/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Android.mk |
---|
0,0 → 1,39 |
# Mesa 3-D graphics library |
# |
# Copyright (C) 2013 LunarG Inc. |
# |
# Permission is hereby granted, free of charge, to any person obtaining a |
# copy of this software and associated documentation files (the "Software"), |
# to deal in the Software without restriction, including without limitation |
# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
# and/or sell copies of the Software, and to permit persons to whom the |
# Software is furnished to do so, subject to the following conditions: |
# |
# The above copyright notice and this permission notice shall be included |
# in all copies or substantial portions of the Software. |
# |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
# DEALINGS IN THE SOFTWARE. |
LOCAL_PATH := $(call my-dir) |
# get C_SOURCES |
include $(LOCAL_PATH)/Makefile.sources |
include $(CLEAR_VARS) |
LOCAL_C_INCLUDES := \ |
$(LOCAL_PATH)/include \ |
$(GALLIUM_TOP)/winsys/intel |
LOCAL_SRC_FILES := $(C_SOURCES) |
LOCAL_MODULE := libmesa_pipe_ilo |
include $(GALLIUM_COMMON_MK) |
include $(BUILD_STATIC_LIBRARY) |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Makefile.am |
---|
0,0 → 1,37 |
# Copyright © 2012 Intel Corporation |
# Copyright (C) 2013 LunarG, Inc. |
# |
# Permission is hereby granted, free of charge, to any person obtaining a |
# copy of this software and associated documentation files (the "Software"), |
# to deal in the Software without restriction, including without limitation |
# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
# and/or sell copies of the Software, and to permit persons to whom the |
# Software is furnished to do so, subject to the following conditions: |
# |
# The above copyright notice and this permission notice (including the next |
# paragraph) shall be included in all copies or substantial portions of the |
# Software. |
# |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
# DEALINGS IN THE SOFTWARE. |
include Makefile.sources |
include $(top_srcdir)/src/gallium/Automake.inc |
noinst_LTLIBRARIES = libilo.la |
AM_CPPFLAGS = \ |
-Iinclude \ |
-I$(top_srcdir)/src/gallium/winsys/intel \ |
$(GALLIUM_CFLAGS) |
AM_CFLAGS = \ |
$(VISIBILITY_CFLAGS) |
libilo_la_SOURCES = $(C_SOURCES) |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Makefile.in |
---|
0,0 → 1,936 |
# Makefile.in generated by automake 1.14 from Makefile.am. |
# @configure_input@ |
# Copyright (C) 1994-2013 Free Software Foundation, Inc. |
# This Makefile.in is free software; the Free Software Foundation |
# gives unlimited permission to copy and/or distribute it, |
# with or without modifications, as long as this notice is preserved. |
# This program is distributed in the hope that it will be useful, |
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without |
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
# PARTICULAR PURPOSE. |
@SET_MAKE@ |
# Copyright © 2012 Intel Corporation |
# Copyright (C) 2013 LunarG, Inc. |
# |
# Permission is hereby granted, free of charge, to any person obtaining a |
# copy of this software and associated documentation files (the "Software"), |
# to deal in the Software without restriction, including without limitation |
# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
# and/or sell copies of the Software, and to permit persons to whom the |
# Software is furnished to do so, subject to the following conditions: |
# |
# The above copyright notice and this permission notice (including the next |
# paragraph) shall be included in all copies or substantial portions of the |
# Software. |
# |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
# DEALINGS IN THE SOFTWARE. |
VPATH = @srcdir@ |
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' |
am__make_running_with_option = \ |
case $${target_option-} in \ |
?) ;; \ |
*) echo "am__make_running_with_option: internal error: invalid" \ |
"target option '$${target_option-}' specified" >&2; \ |
exit 1;; \ |
esac; \ |
has_opt=no; \ |
sane_makeflags=$$MAKEFLAGS; \ |
if $(am__is_gnu_make); then \ |
sane_makeflags=$$MFLAGS; \ |
else \ |
case $$MAKEFLAGS in \ |
*\\[\ \ ]*) \ |
bs=\\; \ |
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ |
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ |
esac; \ |
fi; \ |
skip_next=no; \ |
strip_trailopt () \ |
{ \ |
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ |
}; \ |
for flg in $$sane_makeflags; do \ |
test $$skip_next = yes && { skip_next=no; continue; }; \ |
case $$flg in \ |
*=*|--*) continue;; \ |
-*I) strip_trailopt 'I'; skip_next=yes;; \ |
-*I?*) strip_trailopt 'I';; \ |
-*O) strip_trailopt 'O'; skip_next=yes;; \ |
-*O?*) strip_trailopt 'O';; \ |
-*l) strip_trailopt 'l'; skip_next=yes;; \ |
-*l?*) strip_trailopt 'l';; \ |
-[dEDm]) skip_next=yes;; \ |
-[JT]) skip_next=yes;; \ |
esac; \ |
case $$flg in \ |
*$$target_option*) has_opt=yes; break;; \ |
esac; \ |
done; \ |
test $$has_opt = yes |
am__make_dryrun = (target_option=n; $(am__make_running_with_option)) |
am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) |
pkgdatadir = $(datadir)/@PACKAGE@ |
pkgincludedir = $(includedir)/@PACKAGE@ |
pkglibdir = $(libdir)/@PACKAGE@ |
pkglibexecdir = $(libexecdir)/@PACKAGE@ |
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd |
install_sh_DATA = $(install_sh) -c -m 644 |
install_sh_PROGRAM = $(install_sh) -c |
install_sh_SCRIPT = $(install_sh) -c |
INSTALL_HEADER = $(INSTALL_DATA) |
transform = $(program_transform_name) |
NORMAL_INSTALL = : |
PRE_INSTALL = : |
POST_INSTALL = : |
NORMAL_UNINSTALL = : |
PRE_UNINSTALL = : |
POST_UNINSTALL = : |
build_triplet = @build@ |
host_triplet = @host@ |
target_triplet = @target@ |
DIST_COMMON = $(srcdir)/Makefile.sources \ |
$(top_srcdir)/src/gallium/Automake.inc $(srcdir)/Makefile.in \ |
$(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp |
subdir = src/gallium/drivers/ilo |
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 |
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \ |
$(top_srcdir)/m4/ax_prog_cc_for_build.m4 \ |
$(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \ |
$(top_srcdir)/m4/ax_prog_flex.m4 \ |
$(top_srcdir)/m4/ax_pthread.m4 \ |
$(top_srcdir)/m4/ax_python_module.m4 \ |
$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ |
$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ |
$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac |
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ |
$(ACLOCAL_M4) |
mkinstalldirs = $(install_sh) -d |
CONFIG_CLEAN_FILES = |
CONFIG_CLEAN_VPATH_FILES = |
LTLIBRARIES = $(noinst_LTLIBRARIES) |
libilo_la_LIBADD = |
am__objects_1 = ilo_3d.lo ilo_3d_pipeline.lo ilo_3d_pipeline_dump.lo \ |
ilo_3d_pipeline_gen6.lo ilo_3d_pipeline_gen7.lo ilo_blit.lo \ |
ilo_blitter.lo ilo_blitter_blt.lo ilo_blitter_pipe.lo \ |
ilo_context.lo ilo_cp.lo ilo_format.lo ilo_gpe_gen6.lo \ |
ilo_gpe_gen7.lo ilo_gpgpu.lo ilo_query.lo ilo_resource.lo \ |
ilo_screen.lo ilo_shader.lo ilo_state.lo ilo_transfer.lo \ |
ilo_video.lo ilo_shader_cs.lo ilo_shader_fs.lo \ |
ilo_shader_gs.lo ilo_shader_vs.lo toy_compiler.lo \ |
toy_compiler_asm.lo toy_compiler_disasm.lo toy_legalize.lo \ |
toy_legalize_ra.lo toy_optimize.lo toy_tgsi.lo |
am_libilo_la_OBJECTS = $(am__objects_1) |
libilo_la_OBJECTS = $(am_libilo_la_OBJECTS) |
AM_V_lt = $(am__v_lt_@AM_V@) |
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) |
am__v_lt_0 = --silent |
am__v_lt_1 = |
AM_V_P = $(am__v_P_@AM_V@) |
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) |
am__v_P_0 = false |
am__v_P_1 = : |
AM_V_GEN = $(am__v_GEN_@AM_V@) |
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) |
am__v_GEN_0 = @echo " GEN " $@; |
am__v_GEN_1 = |
AM_V_at = $(am__v_at_@AM_V@) |
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) |
am__v_at_0 = @ |
am__v_at_1 = |
DEFAULT_INCLUDES = -I.@am__isrc@ |
depcomp = $(SHELL) $(top_srcdir)/bin/depcomp |
am__depfiles_maybe = depfiles |
am__mv = mv -f |
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ |
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) |
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ |
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ |
$(AM_CFLAGS) $(CFLAGS) |
AM_V_CC = $(am__v_CC_@AM_V@) |
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) |
am__v_CC_0 = @echo " CC " $@; |
am__v_CC_1 = |
CCLD = $(CC) |
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ |
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ |
$(AM_LDFLAGS) $(LDFLAGS) -o $@ |
AM_V_CCLD = $(am__v_CCLD_@AM_V@) |
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) |
am__v_CCLD_0 = @echo " CCLD " $@; |
am__v_CCLD_1 = |
SOURCES = $(libilo_la_SOURCES) |
DIST_SOURCES = $(libilo_la_SOURCES) |
am__can_run_installinfo = \ |
case $$AM_UPDATE_INFO_DIR in \ |
n|no|NO) false;; \ |
*) (install-info --version) >/dev/null 2>&1;; \ |
esac |
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) |
# Read a list of newline-separated strings from the standard input, |
# and print each of them once, without duplicates. Input order is |
# *not* preserved. |
am__uniquify_input = $(AWK) '\ |
BEGIN { nonempty = 0; } \ |
{ items[$$0] = 1; nonempty = 1; } \ |
END { if (nonempty) { for (i in items) print i; }; } \ |
' |
# Make sure the list of sources is unique. This is necessary because, |
# e.g., the same source file might be shared among _SOURCES variables |
# for different programs/libraries. |
am__define_uniq_tagged_files = \ |
list='$(am__tagged_files)'; \ |
unique=`for i in $$list; do \ |
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ |
done | $(am__uniquify_input)` |
ETAGS = etags |
CTAGS = ctags |
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) |
ACLOCAL = @ACLOCAL@ |
AMTAR = @AMTAR@ |
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ |
AR = @AR@ |
AUTOCONF = @AUTOCONF@ |
AUTOHEADER = @AUTOHEADER@ |
AUTOMAKE = @AUTOMAKE@ |
AWK = @AWK@ |
BUILD_EXEEXT = @BUILD_EXEEXT@ |
BUILD_OBJEXT = @BUILD_OBJEXT@ |
CC = @CC@ |
CCAS = @CCAS@ |
CCASDEPMODE = @CCASDEPMODE@ |
CCASFLAGS = @CCASFLAGS@ |
CCDEPMODE = @CCDEPMODE@ |
CC_FOR_BUILD = @CC_FOR_BUILD@ |
CFLAGS = @CFLAGS@ |
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@ |
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ |
CLOCK_LIB = @CLOCK_LIB@ |
CPP = @CPP@ |
CPPFLAGS = @CPPFLAGS@ |
CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@ |
CPP_FOR_BUILD = @CPP_FOR_BUILD@ |
CXX = @CXX@ |
CXXCPP = @CXXCPP@ |
CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@ |
CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@ |
CXXDEPMODE = @CXXDEPMODE@ |
CXXFLAGS = @CXXFLAGS@ |
CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@ |
CXX_FOR_BUILD = @CXX_FOR_BUILD@ |
CYGPATH_W = @CYGPATH_W@ |
DEFINES = @DEFINES@ |
DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@ |
DEFS = @DEFS@ |
DEPDIR = @DEPDIR@ |
DLLTOOL = @DLLTOOL@ |
DLOPEN_LIBS = @DLOPEN_LIBS@ |
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ |
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ |
DRIGL_CFLAGS = @DRIGL_CFLAGS@ |
DRIGL_LIBS = @DRIGL_LIBS@ |
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ |
DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ |
DRI_LIB_DEPS = @DRI_LIB_DEPS@ |
DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ |
DSYMUTIL = @DSYMUTIL@ |
DUMPBIN = @DUMPBIN@ |
ECHO_C = @ECHO_C@ |
ECHO_N = @ECHO_N@ |
ECHO_T = @ECHO_T@ |
EGL_CFLAGS = @EGL_CFLAGS@ |
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ |
EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@ |
EGL_LIB_DEPS = @EGL_LIB_DEPS@ |
EGL_LIB_GLOB = @EGL_LIB_GLOB@ |
EGL_LIB_NAME = @EGL_LIB_NAME@ |
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ |
EGL_PLATFORMS = @EGL_PLATFORMS@ |
EGREP = @EGREP@ |
ELF_LIB = @ELF_LIB@ |
EXEEXT = @EXEEXT@ |
EXPAT_INCLUDES = @EXPAT_INCLUDES@ |
FGREP = @FGREP@ |
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ |
FREEDRENO_LIBS = @FREEDRENO_LIBS@ |
GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@ |
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ |
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@ |
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@ |
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@ |
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ |
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ |
GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@ |
GLAPI_LIB_NAME = @GLAPI_LIB_NAME@ |
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ |
GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@ |
GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@ |
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ |
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ |
GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@ |
GLESv2_LIB_NAME = @GLESv2_LIB_NAME@ |
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ |
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ |
GLPROTO_LIBS = @GLPROTO_LIBS@ |
GLX_TLS = @GLX_TLS@ |
GL_LIB = @GL_LIB@ |
GL_LIB_DEPS = @GL_LIB_DEPS@ |
GL_LIB_GLOB = @GL_LIB_GLOB@ |
GL_LIB_NAME = @GL_LIB_NAME@ |
GL_PC_CFLAGS = @GL_PC_CFLAGS@ |
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ |
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ |
GREP = @GREP@ |
HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ |
INDENT = @INDENT@ |
INDENT_FLAGS = @INDENT_FLAGS@ |
INSTALL = @INSTALL@ |
INSTALL_DATA = @INSTALL_DATA@ |
INSTALL_PROGRAM = @INSTALL_PROGRAM@ |
INSTALL_SCRIPT = @INSTALL_SCRIPT@ |
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ |
INTEL_CFLAGS = @INTEL_CFLAGS@ |
INTEL_LIBS = @INTEL_LIBS@ |
LD = @LD@ |
LDFLAGS = @LDFLAGS@ |
LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@ |
LEX = @LEX@ |
LEXLIB = @LEXLIB@ |
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ |
LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ |
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ |
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ |
LIBDRM_LIBS = @LIBDRM_LIBS@ |
LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@ |
LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@ |
LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@ |
LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@ |
LIBOBJS = @LIBOBJS@ |
LIBS = @LIBS@ |
LIBTOOL = @LIBTOOL@ |
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@ |
LIBUDEV_LIBS = @LIBUDEV_LIBS@ |
LIB_DIR = @LIB_DIR@ |
LIPO = @LIPO@ |
LLVM_BINDIR = @LLVM_BINDIR@ |
LLVM_CFLAGS = @LLVM_CFLAGS@ |
LLVM_CONFIG = @LLVM_CONFIG@ |
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ |
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ |
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ |
LLVM_LDFLAGS = @LLVM_LDFLAGS@ |
LLVM_LIBDIR = @LLVM_LIBDIR@ |
LLVM_LIBS = @LLVM_LIBS@ |
LLVM_VERSION = @LLVM_VERSION@ |
LN_S = @LN_S@ |
LTLIBOBJS = @LTLIBOBJS@ |
MAKE = @MAKE@ |
MAKEINFO = @MAKEINFO@ |
MANIFEST_TOOL = @MANIFEST_TOOL@ |
MESA_LLVM = @MESA_LLVM@ |
MKDIR_P = @MKDIR_P@ |
NM = @NM@ |
NMEDIT = @NMEDIT@ |
NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ |
NOUVEAU_LIBS = @NOUVEAU_LIBS@ |
OBJDUMP = @OBJDUMP@ |
OBJEXT = @OBJEXT@ |
OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@ |
OSMESA_LIB = @OSMESA_LIB@ |
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ |
OSMESA_LIB_NAME = @OSMESA_LIB_NAME@ |
OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@ |
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ |
OSMESA_PC_REQ = @OSMESA_PC_REQ@ |
OSMESA_VERSION = @OSMESA_VERSION@ |
OTOOL = @OTOOL@ |
OTOOL64 = @OTOOL64@ |
PACKAGE = @PACKAGE@ |
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ |
PACKAGE_NAME = @PACKAGE_NAME@ |
PACKAGE_STRING = @PACKAGE_STRING@ |
PACKAGE_TARNAME = @PACKAGE_TARNAME@ |
PACKAGE_URL = @PACKAGE_URL@ |
PACKAGE_VERSION = @PACKAGE_VERSION@ |
PATH_SEPARATOR = @PATH_SEPARATOR@ |
PERL = @PERL@ |
PKG_CONFIG = @PKG_CONFIG@ |
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ |
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ |
POSIX_SHELL = @POSIX_SHELL@ |
PTHREAD_CC = @PTHREAD_CC@ |
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ |
PTHREAD_LIBS = @PTHREAD_LIBS@ |
PYTHON2 = @PYTHON2@ |
RADEON_CFLAGS = @RADEON_CFLAGS@ |
RADEON_LIBS = @RADEON_LIBS@ |
RANLIB = @RANLIB@ |
SED = @SED@ |
SELINUX_LIBS = @SELINUX_LIBS@ |
SET_MAKE = @SET_MAKE@ |
SHELL = @SHELL@ |
STRIP = @STRIP@ |
VDPAU_CFLAGS = @VDPAU_CFLAGS@ |
VDPAU_LIBS = @VDPAU_LIBS@ |
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ |
VDPAU_MAJOR = @VDPAU_MAJOR@ |
VDPAU_MINOR = @VDPAU_MINOR@ |
VERSION = @VERSION@ |
VG_LIB_DEPS = @VG_LIB_DEPS@ |
VG_LIB_GLOB = @VG_LIB_GLOB@ |
VG_LIB_NAME = @VG_LIB_NAME@ |
VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@ |
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ |
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ |
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@ |
WAYLAND_LIBS = @WAYLAND_LIBS@ |
WAYLAND_SCANNER = @WAYLAND_SCANNER@ |
X11_INCLUDES = @X11_INCLUDES@ |
XA_MAJOR = @XA_MAJOR@ |
XA_MINOR = @XA_MINOR@ |
XA_TINY = @XA_TINY@ |
XA_VERSION = @XA_VERSION@ |
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ |
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ |
XEXT_CFLAGS = @XEXT_CFLAGS@ |
XEXT_LIBS = @XEXT_LIBS@ |
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ |
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ |
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ |
XLIBGL_LIBS = @XLIBGL_LIBS@ |
XORG_CFLAGS = @XORG_CFLAGS@ |
XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@ |
XORG_LIBS = @XORG_LIBS@ |
XVMC_CFLAGS = @XVMC_CFLAGS@ |
XVMC_LIBS = @XVMC_LIBS@ |
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ |
XVMC_MAJOR = @XVMC_MAJOR@ |
XVMC_MINOR = @XVMC_MINOR@ |
YACC = @YACC@ |
YFLAGS = @YFLAGS@ |
abs_builddir = @abs_builddir@ |
abs_srcdir = @abs_srcdir@ |
abs_top_builddir = @abs_top_builddir@ |
abs_top_srcdir = @abs_top_srcdir@ |
ac_ct_AR = @ac_ct_AR@ |
ac_ct_CC = @ac_ct_CC@ |
ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@ |
ac_ct_CXX = @ac_ct_CXX@ |
ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@ |
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ |
am__include = @am__include@ |
am__leading_dot = @am__leading_dot@ |
am__quote = @am__quote@ |
am__tar = @am__tar@ |
am__untar = @am__untar@ |
ax_pthread_config = @ax_pthread_config@ |
bindir = @bindir@ |
build = @build@ |
build_alias = @build_alias@ |
build_cpu = @build_cpu@ |
build_os = @build_os@ |
build_vendor = @build_vendor@ |
builddir = @builddir@ |
datadir = @datadir@ |
datarootdir = @datarootdir@ |
docdir = @docdir@ |
dvidir = @dvidir@ |
exec_prefix = @exec_prefix@ |
host = @host@ |
host_alias = @host_alias@ |
host_cpu = @host_cpu@ |
host_os = @host_os@ |
host_vendor = @host_vendor@ |
htmldir = @htmldir@ |
includedir = @includedir@ |
infodir = @infodir@ |
install_sh = @install_sh@ |
libdir = @libdir@ |
libexecdir = @libexecdir@ |
localedir = @localedir@ |
localstatedir = @localstatedir@ |
mandir = @mandir@ |
mkdir_p = @mkdir_p@ |
oldincludedir = @oldincludedir@ |
pdfdir = @pdfdir@ |
prefix = @prefix@ |
program_transform_name = @program_transform_name@ |
psdir = @psdir@ |
sbindir = @sbindir@ |
sharedstatedir = @sharedstatedir@ |
srcdir = @srcdir@ |
sysconfdir = @sysconfdir@ |
target = @target@ |
target_alias = @target_alias@ |
target_cpu = @target_cpu@ |
target_os = @target_os@ |
target_vendor = @target_vendor@ |
top_build_prefix = @top_build_prefix@ |
top_builddir = @top_builddir@ |
top_srcdir = @top_srcdir@ |
C_SOURCES := \ |
ilo_3d.c \ |
ilo_3d_pipeline.c \ |
ilo_3d_pipeline_dump.c \ |
ilo_3d_pipeline_gen6.c \ |
ilo_3d_pipeline_gen7.c \ |
ilo_blit.c \ |
ilo_blitter.c \ |
ilo_blitter_blt.c \ |
ilo_blitter_pipe.c \ |
ilo_context.c \ |
ilo_cp.c \ |
ilo_format.c \ |
ilo_gpe_gen6.c \ |
ilo_gpe_gen7.c \ |
ilo_gpgpu.c \ |
ilo_query.c \ |
ilo_resource.c \ |
ilo_screen.c \ |
ilo_shader.c \ |
ilo_state.c \ |
ilo_transfer.c \ |
ilo_video.c \ |
shader/ilo_shader_cs.c \ |
shader/ilo_shader_fs.c \ |
shader/ilo_shader_gs.c \ |
shader/ilo_shader_vs.c \ |
shader/toy_compiler.c \ |
shader/toy_compiler_asm.c \ |
shader/toy_compiler_disasm.c \ |
shader/toy_legalize.c \ |
shader/toy_legalize_ra.c \ |
shader/toy_optimize.c \ |
shader/toy_tgsi.c |
GALLIUM_CFLAGS = \ |
-I$(top_srcdir)/include \ |
-I$(top_srcdir)/src/gallium/include \ |
-I$(top_srcdir)/src/gallium/auxiliary \ |
$(DEFINES) |
noinst_LTLIBRARIES = libilo.la |
AM_CPPFLAGS = \ |
-Iinclude \ |
-I$(top_srcdir)/src/gallium/winsys/intel \ |
$(GALLIUM_CFLAGS) |
AM_CFLAGS = \ |
$(VISIBILITY_CFLAGS) |
libilo_la_SOURCES = $(C_SOURCES) |
all: all-am |
.SUFFIXES: |
.SUFFIXES: .c .lo .o .obj |
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) |
@for dep in $?; do \ |
case '$(am__configure_deps)' in \ |
*$$dep*) \ |
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ |
&& { if test -f $@; then exit 0; else break; fi; }; \ |
exit 1;; \ |
esac; \ |
done; \ |
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/ilo/Makefile'; \ |
$(am__cd) $(top_srcdir) && \ |
$(AUTOMAKE) --foreign src/gallium/drivers/ilo/Makefile |
.PRECIOUS: Makefile |
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status |
@case '$?' in \ |
*config.status*) \ |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ |
*) \ |
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ |
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ |
esac; |
$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc: |
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(top_srcdir)/configure: $(am__configure_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(ACLOCAL_M4): $(am__aclocal_m4_deps) |
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh |
$(am__aclocal_m4_deps): |
clean-noinstLTLIBRARIES: |
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) |
@list='$(noinst_LTLIBRARIES)'; \ |
locs=`for p in $$list; do echo $$p; done | \ |
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ |
sort -u`; \ |
test -z "$$locs" || { \ |
echo rm -f $${locs}; \ |
rm -f $${locs}; \ |
} |
libilo.la: $(libilo_la_OBJECTS) $(libilo_la_DEPENDENCIES) $(EXTRA_libilo_la_DEPENDENCIES) |
$(AM_V_CCLD)$(LINK) $(libilo_la_OBJECTS) $(libilo_la_LIBADD) $(LIBS) |
mostlyclean-compile: |
-rm -f *.$(OBJEXT) |
distclean-compile: |
-rm -f *.tab.c |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline_dump.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline_gen6.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline_gen7.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blit.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blitter.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blitter_blt.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blitter_pipe.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_context.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_cp.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_format.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_gpe_gen6.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_gpe_gen7.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_gpgpu.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_query.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_resource.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_screen.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_cs.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_fs.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_gs.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_vs.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_state.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_transfer.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_video.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_compiler.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_compiler_asm.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_compiler_disasm.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_legalize.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_legalize_ra.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_optimize.Plo@am__quote@ |
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_tgsi.Plo@am__quote@ |
.c.o: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< |
.c.obj: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` |
.c.lo: |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< |
ilo_shader_cs.lo: shader/ilo_shader_cs.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_cs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_cs.Tpo -c -o ilo_shader_cs.lo `test -f 'shader/ilo_shader_cs.c' || echo '$(srcdir)/'`shader/ilo_shader_cs.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_cs.Tpo $(DEPDIR)/ilo_shader_cs.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_cs.c' object='ilo_shader_cs.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_cs.lo `test -f 'shader/ilo_shader_cs.c' || echo '$(srcdir)/'`shader/ilo_shader_cs.c |
ilo_shader_fs.lo: shader/ilo_shader_fs.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_fs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_fs.Tpo -c -o ilo_shader_fs.lo `test -f 'shader/ilo_shader_fs.c' || echo '$(srcdir)/'`shader/ilo_shader_fs.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_fs.Tpo $(DEPDIR)/ilo_shader_fs.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_fs.c' object='ilo_shader_fs.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_fs.lo `test -f 'shader/ilo_shader_fs.c' || echo '$(srcdir)/'`shader/ilo_shader_fs.c |
ilo_shader_gs.lo: shader/ilo_shader_gs.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_gs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_gs.Tpo -c -o ilo_shader_gs.lo `test -f 'shader/ilo_shader_gs.c' || echo '$(srcdir)/'`shader/ilo_shader_gs.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_gs.Tpo $(DEPDIR)/ilo_shader_gs.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_gs.c' object='ilo_shader_gs.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_gs.lo `test -f 'shader/ilo_shader_gs.c' || echo '$(srcdir)/'`shader/ilo_shader_gs.c |
ilo_shader_vs.lo: shader/ilo_shader_vs.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_vs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_vs.Tpo -c -o ilo_shader_vs.lo `test -f 'shader/ilo_shader_vs.c' || echo '$(srcdir)/'`shader/ilo_shader_vs.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_vs.Tpo $(DEPDIR)/ilo_shader_vs.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_vs.c' object='ilo_shader_vs.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_vs.lo `test -f 'shader/ilo_shader_vs.c' || echo '$(srcdir)/'`shader/ilo_shader_vs.c |
toy_compiler.lo: shader/toy_compiler.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_compiler.lo -MD -MP -MF $(DEPDIR)/toy_compiler.Tpo -c -o toy_compiler.lo `test -f 'shader/toy_compiler.c' || echo '$(srcdir)/'`shader/toy_compiler.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_compiler.Tpo $(DEPDIR)/toy_compiler.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_compiler.c' object='toy_compiler.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_compiler.lo `test -f 'shader/toy_compiler.c' || echo '$(srcdir)/'`shader/toy_compiler.c |
toy_compiler_asm.lo: shader/toy_compiler_asm.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_compiler_asm.lo -MD -MP -MF $(DEPDIR)/toy_compiler_asm.Tpo -c -o toy_compiler_asm.lo `test -f 'shader/toy_compiler_asm.c' || echo '$(srcdir)/'`shader/toy_compiler_asm.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_compiler_asm.Tpo $(DEPDIR)/toy_compiler_asm.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_compiler_asm.c' object='toy_compiler_asm.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_compiler_asm.lo `test -f 'shader/toy_compiler_asm.c' || echo '$(srcdir)/'`shader/toy_compiler_asm.c |
toy_compiler_disasm.lo: shader/toy_compiler_disasm.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_compiler_disasm.lo -MD -MP -MF $(DEPDIR)/toy_compiler_disasm.Tpo -c -o toy_compiler_disasm.lo `test -f 'shader/toy_compiler_disasm.c' || echo '$(srcdir)/'`shader/toy_compiler_disasm.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_compiler_disasm.Tpo $(DEPDIR)/toy_compiler_disasm.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_compiler_disasm.c' object='toy_compiler_disasm.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_compiler_disasm.lo `test -f 'shader/toy_compiler_disasm.c' || echo '$(srcdir)/'`shader/toy_compiler_disasm.c |
toy_legalize.lo: shader/toy_legalize.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_legalize.lo -MD -MP -MF $(DEPDIR)/toy_legalize.Tpo -c -o toy_legalize.lo `test -f 'shader/toy_legalize.c' || echo '$(srcdir)/'`shader/toy_legalize.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_legalize.Tpo $(DEPDIR)/toy_legalize.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_legalize.c' object='toy_legalize.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_legalize.lo `test -f 'shader/toy_legalize.c' || echo '$(srcdir)/'`shader/toy_legalize.c |
toy_legalize_ra.lo: shader/toy_legalize_ra.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_legalize_ra.lo -MD -MP -MF $(DEPDIR)/toy_legalize_ra.Tpo -c -o toy_legalize_ra.lo `test -f 'shader/toy_legalize_ra.c' || echo '$(srcdir)/'`shader/toy_legalize_ra.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_legalize_ra.Tpo $(DEPDIR)/toy_legalize_ra.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_legalize_ra.c' object='toy_legalize_ra.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_legalize_ra.lo `test -f 'shader/toy_legalize_ra.c' || echo '$(srcdir)/'`shader/toy_legalize_ra.c |
toy_optimize.lo: shader/toy_optimize.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_optimize.lo -MD -MP -MF $(DEPDIR)/toy_optimize.Tpo -c -o toy_optimize.lo `test -f 'shader/toy_optimize.c' || echo '$(srcdir)/'`shader/toy_optimize.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_optimize.Tpo $(DEPDIR)/toy_optimize.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_optimize.c' object='toy_optimize.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_optimize.lo `test -f 'shader/toy_optimize.c' || echo '$(srcdir)/'`shader/toy_optimize.c |
toy_tgsi.lo: shader/toy_tgsi.c |
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_tgsi.lo -MD -MP -MF $(DEPDIR)/toy_tgsi.Tpo -c -o toy_tgsi.lo `test -f 'shader/toy_tgsi.c' || echo '$(srcdir)/'`shader/toy_tgsi.c |
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_tgsi.Tpo $(DEPDIR)/toy_tgsi.Plo |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_tgsi.c' object='toy_tgsi.lo' libtool=yes @AMDEPBACKSLASH@ |
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ |
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_tgsi.lo `test -f 'shader/toy_tgsi.c' || echo '$(srcdir)/'`shader/toy_tgsi.c |
mostlyclean-libtool: |
-rm -f *.lo |
clean-libtool: |
-rm -rf .libs _libs |
ID: $(am__tagged_files) |
$(am__define_uniq_tagged_files); mkid -fID $$unique |
tags: tags-am |
TAGS: tags |
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
set x; \ |
here=`pwd`; \ |
$(am__define_uniq_tagged_files); \ |
shift; \ |
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ |
test -n "$$unique" || unique=$$empty_fix; \ |
if test $$# -gt 0; then \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
"$$@" $$unique; \ |
else \ |
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ |
$$unique; \ |
fi; \ |
fi |
ctags: ctags-am |
CTAGS: ctags |
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) |
$(am__define_uniq_tagged_files); \ |
test -z "$(CTAGS_ARGS)$$unique" \ |
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ |
$$unique |
GTAGS: |
here=`$(am__cd) $(top_builddir) && pwd` \ |
&& $(am__cd) $(top_srcdir) \ |
&& gtags -i $(GTAGS_ARGS) "$$here" |
cscopelist: cscopelist-am |
cscopelist-am: $(am__tagged_files) |
list='$(am__tagged_files)'; \ |
case "$(srcdir)" in \ |
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ |
*) sdir=$(subdir)/$(srcdir) ;; \ |
esac; \ |
for i in $$list; do \ |
if test -f "$$i"; then \ |
echo "$(subdir)/$$i"; \ |
else \ |
echo "$$sdir/$$i"; \ |
fi; \ |
done >> $(top_builddir)/cscope.files |
distclean-tags: |
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags |
distdir: $(DISTFILES) |
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ |
list='$(DISTFILES)'; \ |
dist_files=`for file in $$list; do echo $$file; done | \ |
sed -e "s|^$$srcdirstrip/||;t" \ |
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ |
case $$dist_files in \ |
*/*) $(MKDIR_P) `echo "$$dist_files" | \ |
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ |
sort -u` ;; \ |
esac; \ |
for file in $$dist_files; do \ |
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ |
if test -d $$d/$$file; then \ |
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ |
if test -d "$(distdir)/$$file"; then \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ |
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ |
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ |
fi; \ |
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ |
else \ |
test -f "$(distdir)/$$file" \ |
|| cp -p $$d/$$file "$(distdir)/$$file" \ |
|| exit 1; \ |
fi; \ |
done |
check-am: all-am |
check: check-am |
all-am: Makefile $(LTLIBRARIES) |
installdirs: |
install: install-am |
install-exec: install-exec-am |
install-data: install-data-am |
uninstall: uninstall-am |
install-am: all-am |
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am |
installcheck: installcheck-am |
install-strip: |
if test -z '$(STRIP)'; then \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
install; \ |
else \ |
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ |
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ |
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ |
fi |
mostlyclean-generic: |
clean-generic: |
distclean-generic: |
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) |
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) |
maintainer-clean-generic: |
@echo "This command is intended for maintainers to use" |
@echo "it deletes files that may require special tools to rebuild." |
clean: clean-am |
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ |
mostlyclean-am |
distclean: distclean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
distclean-am: clean-am distclean-compile distclean-generic \ |
distclean-tags |
dvi: dvi-am |
dvi-am: |
html: html-am |
html-am: |
info: info-am |
info-am: |
install-data-am: |
install-dvi: install-dvi-am |
install-dvi-am: |
install-exec-am: |
install-html: install-html-am |
install-html-am: |
install-info: install-info-am |
install-info-am: |
install-man: |
install-pdf: install-pdf-am |
install-pdf-am: |
install-ps: install-ps-am |
install-ps-am: |
installcheck-am: |
maintainer-clean: maintainer-clean-am |
-rm -rf ./$(DEPDIR) |
-rm -f Makefile |
maintainer-clean-am: distclean-am maintainer-clean-generic |
mostlyclean: mostlyclean-am |
mostlyclean-am: mostlyclean-compile mostlyclean-generic \ |
mostlyclean-libtool |
pdf: pdf-am |
pdf-am: |
ps: ps-am |
ps-am: |
uninstall-am: |
.MAKE: install-am install-strip |
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ |
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \ |
ctags-am distclean distclean-compile distclean-generic \ |
distclean-libtool distclean-tags distdir dvi dvi-am html \ |
html-am info info-am install install-am install-data \ |
install-data-am install-dvi install-dvi-am install-exec \ |
install-exec-am install-html install-html-am install-info \ |
install-info-am install-man install-pdf install-pdf-am \ |
install-ps install-ps-am install-strip installcheck \ |
installcheck-am installdirs maintainer-clean \ |
maintainer-clean-generic mostlyclean mostlyclean-compile \ |
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ |
tags tags-am uninstall uninstall-am |
# Tell versions [3.59,3.63) of GNU make to not export all variables. |
# Otherwise a system limit (for SysV at least) may be exceeded. |
.NOEXPORT: |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Makefile.sources |
---|
0,0 → 1,34 |
C_SOURCES := \ |
ilo_3d.c \ |
ilo_3d_pipeline.c \ |
ilo_3d_pipeline_dump.c \ |
ilo_3d_pipeline_gen6.c \ |
ilo_3d_pipeline_gen7.c \ |
ilo_blit.c \ |
ilo_blitter.c \ |
ilo_blitter_blt.c \ |
ilo_blitter_pipe.c \ |
ilo_context.c \ |
ilo_cp.c \ |
ilo_format.c \ |
ilo_gpe_gen6.c \ |
ilo_gpe_gen7.c \ |
ilo_gpgpu.c \ |
ilo_query.c \ |
ilo_resource.c \ |
ilo_screen.c \ |
ilo_shader.c \ |
ilo_state.c \ |
ilo_transfer.c \ |
ilo_video.c \ |
shader/ilo_shader_cs.c \ |
shader/ilo_shader_fs.c \ |
shader/ilo_shader_gs.c \ |
shader/ilo_shader_vs.c \ |
shader/toy_compiler.c \ |
shader/toy_compiler_asm.c \ |
shader/toy_compiler_disasm.c \ |
shader/toy_legalize.c \ |
shader/toy_legalize_ra.c \ |
shader/toy_optimize.c \ |
shader/toy_tgsi.c |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d.c |
---|
0,0 → 1,796 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "intel_winsys.h" |
#include "ilo_3d_pipeline.h" |
#include "ilo_context.h" |
#include "ilo_cp.h" |
#include "ilo_query.h" |
#include "ilo_shader.h" |
#include "ilo_state.h" |
#include "ilo_3d.h" |
static void |
process_query_for_occlusion_counter(struct ilo_3d *hw3d, |
struct ilo_query *q) |
{ |
uint64_t *vals, depth_count = 0; |
int i; |
/* in pairs */ |
assert(q->reg_read % 2 == 0); |
intel_bo_map(q->bo, false); |
vals = intel_bo_get_virtual(q->bo); |
for (i = 1; i < q->reg_read; i += 2) |
depth_count += vals[i] - vals[i - 1]; |
intel_bo_unmap(q->bo); |
/* accumulate so that the query can be resumed if wanted */ |
q->data.u64 += depth_count; |
q->reg_read = 0; |
} |
static uint64_t |
timestamp_to_ns(uint64_t timestamp) |
{ |
/* see ilo_get_timestamp() */ |
return (timestamp & 0xffffffff) * 80; |
} |
static void |
process_query_for_timestamp(struct ilo_3d *hw3d, struct ilo_query *q) |
{ |
uint64_t *vals, timestamp; |
assert(q->reg_read == 1); |
intel_bo_map(q->bo, false); |
vals = intel_bo_get_virtual(q->bo); |
timestamp = vals[0]; |
intel_bo_unmap(q->bo); |
q->data.u64 = timestamp_to_ns(timestamp); |
q->reg_read = 0; |
} |
static void |
process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q) |
{ |
uint64_t *vals, elapsed = 0; |
int i; |
/* in pairs */ |
assert(q->reg_read % 2 == 0); |
intel_bo_map(q->bo, false); |
vals = intel_bo_get_virtual(q->bo); |
for (i = 1; i < q->reg_read; i += 2) |
elapsed += vals[i] - vals[i - 1]; |
intel_bo_unmap(q->bo); |
/* accumulate so that the query can be resumed if wanted */ |
q->data.u64 += timestamp_to_ns(elapsed); |
q->reg_read = 0; |
} |
static void |
ilo_3d_resume_queries(struct ilo_3d *hw3d) |
{ |
struct ilo_query *q; |
/* resume occlusion queries */ |
LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) { |
/* accumulate the result if the bo is alreay full */ |
if (q->reg_read >= q->reg_total) |
process_query_for_occlusion_counter(hw3d, q); |
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline, |
q->bo, q->reg_read++); |
} |
/* resume timer queries */ |
LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) { |
/* accumulate the result if the bo is alreay full */ |
if (q->reg_read >= q->reg_total) |
process_query_for_time_elapsed(hw3d, q); |
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline, |
q->bo, q->reg_read++); |
} |
} |
static void |
ilo_3d_pause_queries(struct ilo_3d *hw3d) |
{ |
struct ilo_query *q; |
/* pause occlusion queries */ |
LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) { |
assert(q->reg_read < q->reg_total); |
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline, |
q->bo, q->reg_read++); |
} |
/* pause timer queries */ |
LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) { |
assert(q->reg_read < q->reg_total); |
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline, |
q->bo, q->reg_read++); |
} |
} |
static void |
ilo_3d_release_render_ring(struct ilo_cp *cp, void *data) |
{ |
struct ilo_3d *hw3d = data; |
ilo_3d_pause_queries(hw3d); |
} |
static void |
ilo_3d_own_render_ring(struct ilo_3d *hw3d) |
{ |
ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER); |
if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve)) |
ilo_3d_resume_queries(hw3d); |
} |
/** |
* Begin a query. |
*/ |
void |
ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q) |
{ |
struct ilo_3d *hw3d = ilo->hw3d; |
ilo_3d_own_render_ring(hw3d); |
switch (q->type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
/* reserve some space for pausing the query */ |
q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline, |
ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, NULL); |
hw3d->owner_reserve += q->reg_cmd_size; |
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve); |
q->data.u64 = 0; |
if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) { |
/* XXX we should check the aperture size */ |
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline, |
q->bo, q->reg_read++); |
list_add(&q->list, &hw3d->occlusion_queries); |
} |
break; |
case PIPE_QUERY_TIMESTAMP: |
/* nop */ |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
/* reserve some space for pausing the query */ |
q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline, |
ILO_3D_PIPELINE_WRITE_TIMESTAMP, NULL); |
hw3d->owner_reserve += q->reg_cmd_size; |
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve); |
q->data.u64 = 0; |
if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) { |
/* XXX we should check the aperture size */ |
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline, |
q->bo, q->reg_read++); |
list_add(&q->list, &hw3d->time_elapsed_queries); |
} |
break; |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
q->data.u64 = 0; |
list_add(&q->list, &hw3d->prim_generated_queries); |
break; |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
q->data.u64 = 0; |
list_add(&q->list, &hw3d->prim_emitted_queries); |
break; |
default: |
assert(!"unknown query type"); |
break; |
} |
} |
/** |
* End a query. |
*/ |
void |
ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q) |
{ |
struct ilo_3d *hw3d = ilo->hw3d; |
ilo_3d_own_render_ring(hw3d); |
switch (q->type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
list_del(&q->list); |
assert(q->reg_read < q->reg_total); |
hw3d->owner_reserve -= q->reg_cmd_size; |
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve); |
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline, |
q->bo, q->reg_read++); |
break; |
case PIPE_QUERY_TIMESTAMP: |
q->data.u64 = 0; |
if (ilo_query_alloc_bo(q, 1, 1, hw3d->cp->winsys)) { |
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline, |
q->bo, q->reg_read++); |
} |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
list_del(&q->list); |
assert(q->reg_read < q->reg_total); |
hw3d->owner_reserve -= q->reg_cmd_size; |
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve); |
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline, |
q->bo, q->reg_read++); |
break; |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
list_del(&q->list); |
break; |
default: |
assert(!"unknown query type"); |
break; |
} |
} |
/** |
* Process the raw query data. |
*/ |
void |
ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q) |
{ |
struct ilo_3d *hw3d = ilo->hw3d; |
switch (q->type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
if (q->bo) |
process_query_for_occlusion_counter(hw3d, q); |
break; |
case PIPE_QUERY_TIMESTAMP: |
if (q->bo) |
process_query_for_timestamp(hw3d, q); |
break; |
case PIPE_QUERY_TIME_ELAPSED: |
if (q->bo) |
process_query_for_time_elapsed(hw3d, q); |
break; |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
break; |
default: |
assert(!"unknown query type"); |
break; |
} |
} |
/** |
* Hook for CP new-batch. |
*/ |
void |
ilo_3d_cp_flushed(struct ilo_3d *hw3d) |
{ |
if (ilo_debug & ILO_DEBUG_3D) |
ilo_3d_pipeline_dump(hw3d->pipeline); |
/* invalidate the pipeline */ |
ilo_3d_pipeline_invalidate(hw3d->pipeline, |
ILO_3D_PIPELINE_INVALIDATE_BATCH_BO | |
ILO_3D_PIPELINE_INVALIDATE_STATE_BO); |
if (!hw3d->cp->render_ctx) { |
ilo_3d_pipeline_invalidate(hw3d->pipeline, |
ILO_3D_PIPELINE_INVALIDATE_HW); |
} |
hw3d->new_batch = true; |
} |
/** |
* Create a 3D context. |
*/ |
struct ilo_3d * |
ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev) |
{ |
struct ilo_3d *hw3d; |
hw3d = CALLOC_STRUCT(ilo_3d); |
if (!hw3d) |
return NULL; |
hw3d->cp = cp; |
hw3d->owner.release_callback = ilo_3d_release_render_ring; |
hw3d->owner.release_data = hw3d; |
hw3d->new_batch = true; |
list_inithead(&hw3d->occlusion_queries); |
list_inithead(&hw3d->time_elapsed_queries); |
list_inithead(&hw3d->prim_generated_queries); |
list_inithead(&hw3d->prim_emitted_queries); |
hw3d->pipeline = ilo_3d_pipeline_create(cp, dev); |
if (!hw3d->pipeline) { |
FREE(hw3d); |
return NULL; |
} |
return hw3d; |
} |
/** |
* Destroy a 3D context. |
*/ |
void |
ilo_3d_destroy(struct ilo_3d *hw3d) |
{ |
ilo_3d_pipeline_destroy(hw3d->pipeline); |
if (hw3d->kernel.bo) |
intel_bo_unreference(hw3d->kernel.bo); |
FREE(hw3d); |
} |
static bool |
draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo, |
int *prim_generated, int *prim_emitted) |
{ |
bool need_flush = false; |
int max_len; |
ilo_3d_own_render_ring(hw3d); |
if (!hw3d->new_batch) { |
/* |
* Without a better tracking mechanism, when the framebuffer changes, we |
* have to assume that the old framebuffer may be sampled from. If that |
* happens in the middle of a batch buffer, we need to insert manual |
* flushes. |
*/ |
need_flush = (ilo->dirty & ILO_DIRTY_FB); |
/* same to SO target changes */ |
need_flush |= (ilo->dirty & ILO_DIRTY_SO); |
} |
/* make sure there is enough room first */ |
max_len = ilo_3d_pipeline_estimate_size(hw3d->pipeline, |
ILO_3D_PIPELINE_DRAW, ilo); |
if (need_flush) { |
max_len += ilo_3d_pipeline_estimate_size(hw3d->pipeline, |
ILO_3D_PIPELINE_FLUSH, NULL); |
} |
if (max_len > ilo_cp_space(hw3d->cp)) { |
ilo_cp_flush(hw3d->cp); |
need_flush = false; |
assert(max_len <= ilo_cp_space(hw3d->cp)); |
} |
if (need_flush) |
ilo_3d_pipeline_emit_flush(hw3d->pipeline); |
return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo, |
prim_generated, prim_emitted); |
} |
static void |
update_prim_count(struct ilo_3d *hw3d, int generated, int emitted) |
{ |
struct ilo_query *q; |
LIST_FOR_EACH_ENTRY(q, &hw3d->prim_generated_queries, list) |
q->data.u64 += generated; |
LIST_FOR_EACH_ENTRY(q, &hw3d->prim_emitted_queries, list) |
q->data.u64 += emitted; |
} |
bool |
ilo_3d_pass_render_condition(struct ilo_context *ilo) |
{ |
struct ilo_3d *hw3d = ilo->hw3d; |
uint64_t result; |
bool wait; |
if (!hw3d->render_condition.query) |
return true; |
switch (hw3d->render_condition.mode) { |
case PIPE_RENDER_COND_WAIT: |
case PIPE_RENDER_COND_BY_REGION_WAIT: |
wait = true; |
break; |
case PIPE_RENDER_COND_NO_WAIT: |
case PIPE_RENDER_COND_BY_REGION_NO_WAIT: |
default: |
wait = false; |
break; |
} |
if (ilo->base.get_query_result(&ilo->base, hw3d->render_condition.query, |
wait, (union pipe_query_result *) &result)) |
return (!result == hw3d->render_condition.cond); |
else |
return true; |
} |
#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b)) |
#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b)) |
/** |
* \see find_sub_primitives() from core mesa |
*/ |
static int |
ilo_find_sub_primitives(const void *elements, unsigned element_size, |
const struct pipe_draw_info *orig_info, |
struct pipe_draw_info *info) |
{ |
const unsigned max_prims = orig_info->count - orig_info->start; |
unsigned i, cur_start, cur_count; |
int scan_index; |
unsigned scan_num; |
cur_start = orig_info->start; |
cur_count = 0; |
scan_num = 0; |
#define IB_INDEX_READ(TYPE, INDEX) (((const TYPE *) elements)[INDEX]) |
#define SCAN_ELEMENTS(TYPE) \ |
info[scan_num] = *orig_info; \ |
info[scan_num].primitive_restart = false; \ |
for (i = orig_info->start; i < orig_info->count; i++) { \ |
scan_index = IB_INDEX_READ(TYPE, i); \ |
if (scan_index == orig_info->restart_index) { \ |
if (cur_count > 0) { \ |
assert(scan_num < max_prims); \ |
info[scan_num].start = cur_start; \ |
info[scan_num].count = cur_count; \ |
scan_num++; \ |
info[scan_num] = *orig_info; \ |
info[scan_num].primitive_restart = false; \ |
} \ |
cur_start = i + 1; \ |
cur_count = 0; \ |
} \ |
else { \ |
UPDATE_MIN2(info[scan_num].min_index, scan_index); \ |
UPDATE_MAX2(info[scan_num].max_index, scan_index); \ |
cur_count++; \ |
} \ |
} \ |
if (cur_count > 0) { \ |
assert(scan_num < max_prims); \ |
info[scan_num].start = cur_start; \ |
info[scan_num].count = cur_count; \ |
scan_num++; \ |
} |
switch (element_size) { |
case 1: |
SCAN_ELEMENTS(uint8_t); |
break; |
case 2: |
SCAN_ELEMENTS(uint16_t); |
break; |
case 4: |
SCAN_ELEMENTS(uint32_t); |
break; |
default: |
assert(0 && "bad index_size in find_sub_primitives()"); |
} |
#undef SCAN_ELEMENTS |
return scan_num; |
} |
static inline bool |
ilo_check_restart_index(const struct ilo_context *ilo, unsigned restart_index) |
{ |
/* |
* Haswell (GEN(7.5)) supports an arbitrary cut index, check everything |
* older. |
*/ |
if (ilo->dev->gen >= ILO_GEN(7.5)) |
return true; |
/* Note: indices must be unsigned byte, unsigned short or unsigned int */ |
switch (ilo->ib.index_size) { |
case 1: |
return ((restart_index & 0xff) == 0xff); |
break; |
case 2: |
return ((restart_index & 0xffff) == 0xffff); |
break; |
case 4: |
return (restart_index == 0xffffffff); |
break; |
} |
return false; |
} |
static inline bool |
ilo_check_restart_prim_type(const struct ilo_context *ilo, unsigned prim) |
{ |
switch (prim) { |
case PIPE_PRIM_POINTS: |
case PIPE_PRIM_LINES: |
case PIPE_PRIM_LINE_STRIP: |
case PIPE_PRIM_TRIANGLES: |
case PIPE_PRIM_TRIANGLE_STRIP: |
/* All 965 GEN graphics support a cut index for these primitive types */ |
return true; |
break; |
case PIPE_PRIM_LINE_LOOP: |
case PIPE_PRIM_POLYGON: |
case PIPE_PRIM_QUAD_STRIP: |
case PIPE_PRIM_QUADS: |
case PIPE_PRIM_TRIANGLE_FAN: |
if (ilo->dev->gen >= ILO_GEN(7.5)) { |
/* Haswell and newer parts can handle these prim types. */ |
return true; |
} |
break; |
} |
return false; |
} |
/* |
* Handle VBOs using primitive restart. |
* Verify that restart index and primitive type can be handled by the HW. |
* Return true if this routine did the rendering |
* Return false if this routine did NOT render because restart can be handled |
* in HW. |
*/ |
static void |
ilo_draw_vbo_with_sw_restart(struct pipe_context *pipe, |
const struct pipe_draw_info *info) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct pipe_draw_info *restart_info = NULL; |
int sub_prim_count = 1; |
/* |
* We have to break up the primitive into chunks manually |
* Worst case, every other index could be a restart index so |
* need to have space for that many primitives |
*/ |
restart_info = MALLOC(((info->count + 1) / 2) * sizeof(*info)); |
if (NULL == restart_info) { |
/* If we can't get memory for this, bail out */ |
ilo_err("%s:%d - Out of memory", __FILE__, __LINE__); |
return; |
} |
if (ilo->ib.buffer) { |
struct pipe_transfer *transfer; |
const void *map; |
map = pipe_buffer_map(pipe, ilo->ib.buffer, |
PIPE_TRANSFER_READ, &transfer); |
sub_prim_count = ilo_find_sub_primitives(map + ilo->ib.offset, |
ilo->ib.index_size, info, restart_info); |
pipe_buffer_unmap(pipe, transfer); |
} |
else { |
sub_prim_count = ilo_find_sub_primitives(ilo->ib.user_buffer, |
ilo->ib.index_size, info, restart_info); |
} |
info = restart_info; |
while (sub_prim_count > 0) { |
pipe->draw_vbo(pipe, info); |
sub_prim_count--; |
info++; |
} |
FREE(restart_info); |
} |
static bool |
upload_shaders(struct ilo_3d *hw3d, struct ilo_shader_cache *shc) |
{ |
bool incremental = true; |
int upload; |
upload = ilo_shader_cache_upload(shc, |
NULL, hw3d->kernel.used, incremental); |
if (!upload) |
return true; |
/* |
* Allocate a new bo. When this is a new batch, assume the bo is still in |
* use by the previous batch and force allocation. |
* |
* Does it help to make shader cache upload with unsynchronized mapping, |
* and remove the check for new batch here? |
*/ |
if (hw3d->kernel.used + upload > hw3d->kernel.size || hw3d->new_batch) { |
unsigned new_size = (hw3d->kernel.size) ? |
hw3d->kernel.size : (8 * 1024); |
while (hw3d->kernel.used + upload > new_size) |
new_size *= 2; |
if (hw3d->kernel.bo) |
intel_bo_unreference(hw3d->kernel.bo); |
hw3d->kernel.bo = intel_winsys_alloc_buffer(hw3d->cp->winsys, |
"kernel bo", new_size, 0); |
if (!hw3d->kernel.bo) { |
ilo_err("failed to allocate kernel bo\n"); |
return false; |
} |
hw3d->kernel.used = 0; |
hw3d->kernel.size = new_size; |
incremental = false; |
assert(new_size >= ilo_shader_cache_upload(shc, |
NULL, hw3d->kernel.used, incremental)); |
ilo_3d_pipeline_invalidate(hw3d->pipeline, |
ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO); |
} |
upload = ilo_shader_cache_upload(shc, |
hw3d->kernel.bo, hw3d->kernel.used, incremental); |
if (upload < 0) { |
ilo_err("failed to upload shaders\n"); |
return false; |
} |
hw3d->kernel.used += upload; |
assert(hw3d->kernel.used <= hw3d->kernel.size); |
return true; |
} |
static void |
ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_3d *hw3d = ilo->hw3d; |
int prim_generated, prim_emitted; |
if (!ilo_3d_pass_render_condition(ilo)) |
return; |
if (info->primitive_restart && info->indexed) { |
/* |
* Want to draw an indexed primitive using primitive restart |
* Check that HW can handle the request and fall to SW if not. |
*/ |
if (!ilo_check_restart_index(ilo, info->restart_index) || |
!ilo_check_restart_prim_type(ilo, info->mode)) { |
ilo_draw_vbo_with_sw_restart(pipe, info); |
return; |
} |
} |
ilo_finalize_3d_states(ilo, info); |
if (!upload_shaders(hw3d, ilo->shader_cache)) |
return; |
/* If draw_vbo ever fails, return immediately. */ |
if (!draw_vbo(hw3d, ilo, &prim_generated, &prim_emitted)) |
return; |
/* clear dirty status */ |
ilo->dirty = 0x0; |
hw3d->new_batch = false; |
/* avoid dangling pointer reference */ |
ilo->draw = NULL; |
update_prim_count(hw3d, prim_generated, prim_emitted); |
if (ilo_debug & ILO_DEBUG_NOCACHE) |
ilo_3d_pipeline_emit_flush(hw3d->pipeline); |
} |
static void |
ilo_render_condition(struct pipe_context *pipe, |
struct pipe_query *query, |
boolean condition, |
uint mode) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_3d *hw3d = ilo->hw3d; |
/* reference count? */ |
hw3d->render_condition.query = query; |
hw3d->render_condition.mode = mode; |
hw3d->render_condition.cond = condition; |
} |
static void |
ilo_texture_barrier(struct pipe_context *pipe) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_3d *hw3d = ilo->hw3d; |
if (ilo->cp->ring != ILO_CP_RING_RENDER) |
return; |
ilo_3d_pipeline_emit_flush(hw3d->pipeline); |
/* don't know why */ |
if (ilo->dev->gen >= ILO_GEN(7)) |
ilo_cp_flush(hw3d->cp); |
} |
static void |
ilo_get_sample_position(struct pipe_context *pipe, |
unsigned sample_count, |
unsigned sample_index, |
float *out_value) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_3d *hw3d = ilo->hw3d; |
ilo_3d_pipeline_get_sample_position(hw3d->pipeline, |
sample_count, sample_index, |
&out_value[0], &out_value[1]); |
} |
/** |
* Initialize 3D-related functions. |
*/ |
void |
ilo_init_3d_functions(struct ilo_context *ilo) |
{ |
ilo->base.draw_vbo = ilo_draw_vbo; |
ilo->base.render_condition = ilo_render_condition; |
ilo->base.texture_barrier = ilo_texture_barrier; |
ilo->base.get_sample_position = ilo_get_sample_position; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d.h |
---|
0,0 → 1,91 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_3D_H |
#define ILO_3D_H |
#include "ilo_common.h" |
#include "ilo_cp.h" |
struct ilo_3d_pipeline; |
struct ilo_context; |
struct ilo_query; |
/** |
* 3D context. |
*/ |
struct ilo_3d { |
struct ilo_cp *cp; |
struct ilo_cp_owner owner; |
int owner_reserve; |
bool new_batch; |
struct { |
struct intel_bo *bo; |
unsigned used, size; |
} kernel; |
struct { |
struct pipe_query *query; |
unsigned mode; |
bool cond; |
} render_condition; |
struct list_head occlusion_queries; |
struct list_head time_elapsed_queries; |
struct list_head prim_generated_queries; |
struct list_head prim_emitted_queries; |
struct ilo_3d_pipeline *pipeline; |
}; |
struct ilo_3d * |
ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev); |
void |
ilo_3d_destroy(struct ilo_3d *hw3d); |
void |
ilo_3d_cp_flushed(struct ilo_3d *hw3d); |
void |
ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q); |
void |
ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q); |
void |
ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q); |
bool |
ilo_3d_pass_render_condition(struct ilo_context *ilo); |
void |
ilo_init_3d_functions(struct ilo_context *ilo); |
#endif /* ILO_3D_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c |
---|
0,0 → 1,291 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_prim.h" |
#include "intel_winsys.h" |
#include "ilo_context.h" |
#include "ilo_cp.h" |
#include "ilo_state.h" |
#include "ilo_3d_pipeline_gen6.h" |
#include "ilo_3d_pipeline_gen7.h" |
#include "ilo_3d_pipeline.h" |
/* in U0.4 */ |
struct sample_position { |
uint8_t x, y; |
}; |
/* \see gen6_get_sample_position() */ |
static const struct sample_position sample_position_1x[1] = { |
{ 8, 8 }, |
}; |
static const struct sample_position sample_position_4x[4] = { |
{ 6, 2 }, /* distance from the center is sqrt(40) */ |
{ 14, 6 }, /* distance from the center is sqrt(40) */ |
{ 2, 10 }, /* distance from the center is sqrt(40) */ |
{ 10, 14 }, /* distance from the center is sqrt(40) */ |
}; |
static const struct sample_position sample_position_8x[8] = { |
{ 7, 9 }, /* distance from the center is sqrt(2) */ |
{ 9, 13 }, /* distance from the center is sqrt(26) */ |
{ 11, 3 }, /* distance from the center is sqrt(34) */ |
{ 13, 11 }, /* distance from the center is sqrt(34) */ |
{ 1, 7 }, /* distance from the center is sqrt(50) */ |
{ 5, 1 }, /* distance from the center is sqrt(58) */ |
{ 15, 5 }, /* distance from the center is sqrt(58) */ |
{ 3, 15 }, /* distance from the center is sqrt(74) */ |
}; |
struct ilo_3d_pipeline * |
ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev) |
{ |
struct ilo_3d_pipeline *p; |
int i; |
p = CALLOC_STRUCT(ilo_3d_pipeline); |
if (!p) |
return NULL; |
p->cp = cp; |
p->dev = dev; |
switch (p->dev->gen) { |
case ILO_GEN(6): |
ilo_3d_pipeline_init_gen6(p); |
break; |
case ILO_GEN(7): |
ilo_3d_pipeline_init_gen7(p); |
break; |
default: |
assert(!"unsupported GEN"); |
FREE(p); |
return NULL; |
break; |
} |
p->invalidate_flags = ILO_3D_PIPELINE_INVALIDATE_ALL; |
p->workaround_bo = intel_winsys_alloc_buffer(p->cp->winsys, |
"PIPE_CONTROL workaround", 4096, 0); |
if (!p->workaround_bo) { |
ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n"); |
FREE(p); |
return NULL; |
} |
p->packed_sample_position_1x = |
sample_position_1x[0].x << 4 | |
sample_position_1x[0].y; |
/* pack into dwords */ |
for (i = 0; i < 4; i++) { |
p->packed_sample_position_4x |= |
sample_position_4x[i].x << (8 * i + 4) | |
sample_position_4x[i].y << (8 * i); |
p->packed_sample_position_8x[0] |= |
sample_position_8x[i].x << (8 * i + 4) | |
sample_position_8x[i].y << (8 * i); |
p->packed_sample_position_8x[1] |= |
sample_position_8x[4 + i].x << (8 * i + 4) | |
sample_position_8x[4 + i].y << (8 * i); |
} |
return p; |
} |
void |
ilo_3d_pipeline_destroy(struct ilo_3d_pipeline *p) |
{ |
if (p->workaround_bo) |
intel_bo_unreference(p->workaround_bo); |
FREE(p); |
} |
static void |
handle_invalid_batch_bo(struct ilo_3d_pipeline *p, bool unset) |
{ |
if (p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_BATCH_BO) { |
if (p->dev->gen == ILO_GEN(6)) |
p->state.has_gen6_wa_pipe_control = false; |
if (unset) |
p->invalidate_flags &= ~ILO_3D_PIPELINE_INVALIDATE_BATCH_BO; |
} |
} |
/** |
* Emit context states and 3DPRIMITIVE. |
*/ |
bool |
ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
int *prim_generated, int *prim_emitted) |
{ |
bool success; |
if (ilo->dirty & ILO_DIRTY_SO && |
ilo->so.enabled && !ilo->so.append_bitmask) { |
/* |
* We keep track of the SVBI in the driver, so that we can restore it |
* when the HW context is invalidated (by another process). The value |
* needs to be reset when stream output is enabled and the targets are |
* changed. |
*/ |
p->state.so_num_vertices = 0; |
/* on GEN7+, we need SOL_RESET to reset the SO write offsets */ |
if (p->dev->gen >= ILO_GEN(7)) |
ilo_cp_set_one_off_flags(p->cp, INTEL_EXEC_GEN7_SOL_RESET); |
} |
while (true) { |
struct ilo_cp_jmp_buf jmp; |
int err; |
/* we will rewind if aperture check below fails */ |
ilo_cp_setjmp(p->cp, &jmp); |
handle_invalid_batch_bo(p, false); |
/* draw! */ |
ilo_cp_assert_no_implicit_flush(p->cp, true); |
p->emit_draw(p, ilo); |
ilo_cp_assert_no_implicit_flush(p->cp, false); |
err = intel_winsys_check_aperture_space(ilo->winsys, &p->cp->bo, 1); |
if (!err) { |
success = true; |
break; |
} |
/* rewind */ |
ilo_cp_longjmp(p->cp, &jmp); |
if (ilo_cp_empty(p->cp)) { |
success = false; |
break; |
} |
else { |
/* flush and try again */ |
ilo_cp_flush(p->cp); |
} |
} |
if (success) { |
const int num_verts = |
u_vertices_per_prim(u_reduced_prim(ilo->draw->mode)); |
const int max_emit = |
(p->state.so_max_vertices - p->state.so_num_vertices) / num_verts; |
const int generated = |
u_reduced_prims_for_vertices(ilo->draw->mode, ilo->draw->count); |
const int emitted = MIN2(generated, max_emit); |
p->state.so_num_vertices += emitted * num_verts; |
if (prim_generated) |
*prim_generated = generated; |
if (prim_emitted) |
*prim_emitted = emitted; |
} |
p->invalidate_flags = 0x0; |
return success; |
} |
/** |
* Emit PIPE_CONTROL to flush all caches. |
*/ |
void |
ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p) |
{ |
handle_invalid_batch_bo(p, true); |
p->emit_flush(p); |
} |
/** |
* Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_TIMESTAMP post-sync op. |
*/ |
void |
ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index) |
{ |
handle_invalid_batch_bo(p, true); |
p->emit_write_timestamp(p, bo, index); |
} |
/** |
* Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_DEPTH_COUNT post-sync op. |
*/ |
void |
ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index) |
{ |
handle_invalid_batch_bo(p, true); |
p->emit_write_depth_count(p, bo, index); |
} |
void |
ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p, |
unsigned sample_count, |
unsigned sample_index, |
float *x, float *y) |
{ |
const struct sample_position *pos; |
switch (sample_count) { |
case 1: |
assert(sample_index < Elements(sample_position_1x)); |
pos = sample_position_1x; |
break; |
case 4: |
assert(sample_index < Elements(sample_position_4x)); |
pos = sample_position_4x; |
break; |
case 8: |
assert(sample_index < Elements(sample_position_8x)); |
pos = sample_position_8x; |
break; |
default: |
assert(!"unknown sample count"); |
*x = 0.5f; |
*y = 0.5f; |
return; |
break; |
} |
*x = (float) pos[sample_index].x / 16.0f; |
*y = (float) pos[sample_index].y / 16.0f; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h |
---|
0,0 → 1,281 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_3D_PIPELINE_H |
#define ILO_3D_PIPELINE_H |
#include "ilo_common.h" |
#include "ilo_context.h" |
#include "ilo_gpe_gen6.h" |
#include "ilo_gpe_gen7.h" |
struct intel_bo; |
struct ilo_cp; |
struct ilo_context; |
enum ilo_3d_pipeline_invalidate_flags { |
ILO_3D_PIPELINE_INVALIDATE_HW = 1 << 0, |
ILO_3D_PIPELINE_INVALIDATE_BATCH_BO = 1 << 1, |
ILO_3D_PIPELINE_INVALIDATE_STATE_BO = 1 << 2, |
ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO = 1 << 3, |
ILO_3D_PIPELINE_INVALIDATE_ALL = 0xffffffff, |
}; |
enum ilo_3d_pipeline_action { |
ILO_3D_PIPELINE_DRAW, |
ILO_3D_PIPELINE_FLUSH, |
ILO_3D_PIPELINE_WRITE_TIMESTAMP, |
ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, |
}; |
/** |
* 3D pipeline. |
*/ |
struct ilo_3d_pipeline { |
struct ilo_cp *cp; |
const struct ilo_dev_info *dev; |
uint32_t invalidate_flags; |
struct intel_bo *workaround_bo; |
uint32_t packed_sample_position_1x; |
uint32_t packed_sample_position_4x; |
uint32_t packed_sample_position_8x[2]; |
int (*estimate_size)(struct ilo_3d_pipeline *pipeline, |
enum ilo_3d_pipeline_action action, |
const void *arg); |
void (*emit_draw)(struct ilo_3d_pipeline *pipeline, |
const struct ilo_context *ilo); |
void (*emit_flush)(struct ilo_3d_pipeline *pipeline); |
void (*emit_write_timestamp)(struct ilo_3d_pipeline *pipeline, |
struct intel_bo *bo, int index); |
void (*emit_write_depth_count)(struct ilo_3d_pipeline *pipeline, |
struct intel_bo *bo, int index); |
/** |
* all GPE functions of all GENs |
*/ |
#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name gen6_ ## name |
GEN6_EMIT(STATE_BASE_ADDRESS); |
GEN6_EMIT(STATE_SIP); |
GEN6_EMIT(PIPELINE_SELECT); |
GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS); |
GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_URB); |
GEN6_EMIT(3DSTATE_VERTEX_BUFFERS); |
GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS); |
GEN6_EMIT(3DSTATE_INDEX_BUFFER); |
GEN6_EMIT(3DSTATE_VF_STATISTICS); |
GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_CC_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_VS); |
GEN6_EMIT(3DSTATE_GS); |
GEN6_EMIT(3DSTATE_CLIP); |
GEN6_EMIT(3DSTATE_SF); |
GEN6_EMIT(3DSTATE_WM); |
GEN6_EMIT(3DSTATE_CONSTANT_VS); |
GEN6_EMIT(3DSTATE_CONSTANT_GS); |
GEN6_EMIT(3DSTATE_CONSTANT_PS); |
GEN6_EMIT(3DSTATE_SAMPLE_MASK); |
GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE); |
GEN6_EMIT(3DSTATE_DEPTH_BUFFER); |
GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET); |
GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN); |
GEN6_EMIT(3DSTATE_LINE_STIPPLE); |
GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS); |
GEN6_EMIT(3DSTATE_GS_SVB_INDEX); |
GEN6_EMIT(3DSTATE_MULTISAMPLE); |
GEN6_EMIT(3DSTATE_STENCIL_BUFFER); |
GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER); |
GEN6_EMIT(3DSTATE_CLEAR_PARAMS); |
GEN6_EMIT(PIPE_CONTROL); |
GEN6_EMIT(3DPRIMITIVE); |
GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA); |
GEN6_EMIT(SF_VIEWPORT); |
GEN6_EMIT(CLIP_VIEWPORT); |
GEN6_EMIT(CC_VIEWPORT); |
GEN6_EMIT(COLOR_CALC_STATE); |
GEN6_EMIT(BLEND_STATE); |
GEN6_EMIT(DEPTH_STENCIL_STATE); |
GEN6_EMIT(SCISSOR_RECT); |
GEN6_EMIT(BINDING_TABLE_STATE); |
GEN6_EMIT(SURFACE_STATE); |
GEN6_EMIT(so_SURFACE_STATE); |
GEN6_EMIT(SAMPLER_STATE); |
GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE); |
GEN6_EMIT(push_constant_buffer); |
#undef GEN6_EMIT |
#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name gen7_ ## name |
GEN7_EMIT(3DSTATE_DEPTH_BUFFER); |
GEN7_EMIT(3DSTATE_CC_STATE_POINTERS); |
GEN7_EMIT(3DSTATE_GS); |
GEN7_EMIT(3DSTATE_SF); |
GEN7_EMIT(3DSTATE_WM); |
GEN7_EMIT(3DSTATE_SAMPLE_MASK); |
GEN7_EMIT(3DSTATE_CONSTANT_HS); |
GEN7_EMIT(3DSTATE_CONSTANT_DS); |
GEN7_EMIT(3DSTATE_HS); |
GEN7_EMIT(3DSTATE_TE); |
GEN7_EMIT(3DSTATE_DS); |
GEN7_EMIT(3DSTATE_STREAMOUT); |
GEN7_EMIT(3DSTATE_SBE); |
GEN7_EMIT(3DSTATE_PS); |
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); |
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC); |
GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS); |
GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS); |
GEN7_EMIT(3DSTATE_URB_VS); |
GEN7_EMIT(3DSTATE_URB_HS); |
GEN7_EMIT(3DSTATE_URB_DS); |
GEN7_EMIT(3DSTATE_URB_GS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS); |
GEN7_EMIT(3DSTATE_SO_DECL_LIST); |
GEN7_EMIT(3DSTATE_SO_BUFFER); |
GEN7_EMIT(SF_CLIP_VIEWPORT); |
#undef GEN7_EMIT |
/** |
* HW states. |
*/ |
struct ilo_3d_pipeline_state { |
bool has_gen6_wa_pipe_control; |
bool primitive_restart; |
int reduced_prim; |
int so_num_vertices, so_max_vertices; |
uint32_t SF_VIEWPORT; |
uint32_t CLIP_VIEWPORT; |
uint32_t SF_CLIP_VIEWPORT; /* GEN7+ */ |
uint32_t CC_VIEWPORT; |
uint32_t COLOR_CALC_STATE; |
uint32_t BLEND_STATE; |
uint32_t DEPTH_STENCIL_STATE; |
uint32_t SCISSOR_RECT; |
struct { |
uint32_t BINDING_TABLE_STATE; |
int BINDING_TABLE_STATE_size; |
uint32_t SURFACE_STATE[ILO_MAX_VS_SURFACES]; |
uint32_t SAMPLER_STATE; |
uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS]; |
uint32_t PUSH_CONSTANT_BUFFER; |
int PUSH_CONSTANT_BUFFER_size; |
} vs; |
struct { |
uint32_t BINDING_TABLE_STATE; |
int BINDING_TABLE_STATE_size; |
uint32_t SURFACE_STATE[ILO_MAX_GS_SURFACES]; |
bool active; |
} gs; |
struct { |
uint32_t BINDING_TABLE_STATE; |
int BINDING_TABLE_STATE_size; |
uint32_t SURFACE_STATE[ILO_MAX_WM_SURFACES]; |
uint32_t SAMPLER_STATE; |
uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS]; |
} wm; |
} state; |
}; |
struct ilo_3d_pipeline * |
ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev); |
void |
ilo_3d_pipeline_destroy(struct ilo_3d_pipeline *pipeline); |
static inline void |
ilo_3d_pipeline_invalidate(struct ilo_3d_pipeline *p, uint32_t flags) |
{ |
p->invalidate_flags |= flags; |
} |
/** |
* Estimate the size of an action. |
*/ |
static inline int |
ilo_3d_pipeline_estimate_size(struct ilo_3d_pipeline *pipeline, |
enum ilo_3d_pipeline_action action, |
const void *arg) |
{ |
return pipeline->estimate_size(pipeline, action, arg); |
} |
bool |
ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
int *prim_generated, int *prim_emitted); |
void |
ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p); |
void |
ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index); |
void |
ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index); |
void |
ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p, |
unsigned sample_count, |
unsigned sample_index, |
float *x, float *y); |
void |
ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p); |
#endif /* ILO_3D_PIPELINE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c |
---|
0,0 → 1,643 |
/* |
* Copyright © 2007 Intel Corporation |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
* IN THE SOFTWARE. |
* |
* Authors: |
* Eric Anholt <eric@anholt.net> |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "intel_winsys.h" |
#include "ilo_cp.h" |
#include "ilo_3d_pipeline.h" |
#define PRINTFLIKE(f, a) _util_printf_format(f, a) |
typedef short GLshort; |
typedef int GLint; |
typedef unsigned char GLubyte; |
typedef unsigned int GLuint; |
typedef float GLfloat; |
#include <stdint.h> |
#include <stdarg.h> |
#include <stdio.h> |
#include "brw_structs.h" |
#include "brw_defines.h" |
struct intel_context { |
int gen; |
struct { |
struct { |
void *virtual; |
} *bo, bo_dst; |
} batch; |
}; |
struct brw_context { |
struct intel_context intel; |
}; |
static void |
batch_out(struct brw_context *brw, const char *name, uint32_t offset, |
int index, char *fmt, ...) PRINTFLIKE(5, 6); |
static void |
batch_out(struct brw_context *brw, const char *name, uint32_t offset, |
int index, char *fmt, ...) |
{ |
struct intel_context *intel = &brw->intel; |
uint32_t *data = intel->batch.bo->virtual + offset; |
va_list va; |
fprintf(stderr, "0x%08x: 0x%08x: %8s: ", |
offset + index * 4, data[index], name); |
va_start(va, fmt); |
vfprintf(stderr, fmt, va); |
va_end(va); |
} |
static const char * |
get_965_surfacetype(unsigned int surfacetype) |
{ |
switch (surfacetype) { |
case 0: return "1D"; |
case 1: return "2D"; |
case 2: return "3D"; |
case 3: return "CUBE"; |
case 4: return "BUFFER"; |
case 7: return "NULL"; |
default: return "unknown"; |
} |
} |
static const char * |
get_965_surface_format(unsigned int surface_format) |
{ |
switch (surface_format) { |
case 0x000: return "r32g32b32a32_float"; |
case 0x0c1: return "b8g8r8a8_unorm"; |
case 0x100: return "b5g6r5_unorm"; |
case 0x102: return "b5g5r5a1_unorm"; |
case 0x104: return "b4g4r4a4_unorm"; |
default: return "unknown"; |
} |
} |
static void dump_vs_state(struct brw_context *brw, uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "VS_STATE"; |
struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "thread0\n"); |
batch_out(brw, name, offset, 1, "thread1\n"); |
batch_out(brw, name, offset, 2, "thread2\n"); |
batch_out(brw, name, offset, 3, "thread3\n"); |
batch_out(brw, name, offset, 4, "thread4: %d threads\n", |
vs->thread4.max_threads + 1); |
batch_out(brw, name, offset, 5, "vs5\n"); |
batch_out(brw, name, offset, 6, "vs6\n"); |
} |
static void dump_gs_state(struct brw_context *brw, uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "GS_STATE"; |
struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "thread0\n"); |
batch_out(brw, name, offset, 1, "thread1\n"); |
batch_out(brw, name, offset, 2, "thread2\n"); |
batch_out(brw, name, offset, 3, "thread3\n"); |
batch_out(brw, name, offset, 4, "thread4: %d threads\n", |
gs->thread4.max_threads + 1); |
batch_out(brw, name, offset, 5, "vs5\n"); |
batch_out(brw, name, offset, 6, "vs6\n"); |
} |
static void dump_clip_state(struct brw_context *brw, uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "CLIP_STATE"; |
struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "thread0\n"); |
batch_out(brw, name, offset, 1, "thread1\n"); |
batch_out(brw, name, offset, 2, "thread2\n"); |
batch_out(brw, name, offset, 3, "thread3\n"); |
batch_out(brw, name, offset, 4, "thread4: %d threads\n", |
clip->thread4.max_threads + 1); |
batch_out(brw, name, offset, 5, "clip5\n"); |
batch_out(brw, name, offset, 6, "clip6\n"); |
batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin); |
batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax); |
batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin); |
batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax); |
} |
static void dump_sf_state(struct brw_context *brw, uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "SF_STATE"; |
struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "thread0\n"); |
batch_out(brw, name, offset, 1, "thread1\n"); |
batch_out(brw, name, offset, 2, "thread2\n"); |
batch_out(brw, name, offset, 3, "thread3\n"); |
batch_out(brw, name, offset, 4, "thread4: %d threads\n", |
sf->thread4.max_threads + 1); |
batch_out(brw, name, offset, 5, "sf5: viewport offset\n"); |
batch_out(brw, name, offset, 6, "sf6\n"); |
batch_out(brw, name, offset, 7, "sf7\n"); |
} |
static void dump_wm_state(struct brw_context *brw, uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "WM_STATE"; |
struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "thread0\n"); |
batch_out(brw, name, offset, 1, "thread1\n"); |
batch_out(brw, name, offset, 2, "thread2\n"); |
batch_out(brw, name, offset, 3, "thread3\n"); |
batch_out(brw, name, offset, 4, "wm4\n"); |
batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n", |
wm->wm5.enable_8_pix ? "8pix" : "", |
wm->wm5.enable_16_pix ? "16pix" : "", |
wm->wm5.program_uses_depth ? ", uses depth" : "", |
wm->wm5.program_computes_depth ? ", computes depth" : "", |
wm->wm5.program_uses_killpixel ? ", kills" : "", |
wm->wm5.thread_dispatch_enable ? "" : ", no dispatch", |
wm->wm5.max_threads + 1); |
batch_out(brw, name, offset, 6, "depth offset constant %f\n", |
wm->global_depth_offset_constant); |
batch_out(brw, name, offset, 7, "depth offset scale %f\n", |
wm->global_depth_offset_scale); |
batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n"); |
batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n"); |
batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n"); |
} |
static void dump_surface_state(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "SURF"; |
uint32_t *surf = brw->intel.batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "%s %s\n", |
get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), |
get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); |
batch_out(brw, name, offset, 1, "offset\n"); |
batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", |
GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, |
GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1, |
GET_FIELD(surf[2], BRW_SURFACE_LOD)); |
batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n", |
GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, |
(surf[3] & BRW_SURFACE_TILED) ? |
((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); |
batch_out(brw, name, offset, 4, "mip base %d\n", |
GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); |
batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", |
GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), |
GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); |
} |
static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "SURF"; |
uint32_t *surf = brw->intel.batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "%s %s\n", |
get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), |
get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); |
batch_out(brw, name, offset, 1, "offset\n"); |
batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", |
GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1, |
GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1, |
surf[5] & INTEL_MASK(3, 0)); |
batch_out(brw, name, offset, 3, "pitch %d, %stiled\n", |
(surf[3] & INTEL_MASK(17, 0)) + 1, |
(surf[0] & (1 << 14)) ? "" : "not "); |
batch_out(brw, name, offset, 4, "mip base %d\n", |
GET_FIELD(surf[5], GEN7_SURFACE_MIN_LOD)); |
batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", |
GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), |
GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); |
} |
static void |
dump_sdc(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "SDC"; |
struct intel_context *intel = &brw->intel; |
if (intel->gen >= 5 && intel->gen <= 6) { |
struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + |
offset); |
batch_out(brw, name, offset, 0, "unorm rgba\n"); |
batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); |
batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); |
batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]); |
batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]); |
batch_out(brw, name, offset, 5, "half float rg\n"); |
batch_out(brw, name, offset, 6, "half float ba\n"); |
batch_out(brw, name, offset, 7, "u16 rg\n"); |
batch_out(brw, name, offset, 8, "u16 ba\n"); |
batch_out(brw, name, offset, 9, "s16 rg\n"); |
batch_out(brw, name, offset, 10, "s16 ba\n"); |
batch_out(brw, name, offset, 11, "s8 rgba\n"); |
} else { |
struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + |
offset); |
batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]); |
batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]); |
batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]); |
batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]); |
} |
} |
static void dump_sampler_state(struct brw_context *brw, |
uint32_t offset, uint32_t size) |
{ |
struct intel_context *intel = &brw->intel; |
int i; |
struct brw_sampler_state *samp = intel->batch.bo->virtual + offset; |
assert(intel->gen < 7); |
for (i = 0; i < size / sizeof(*samp); i++) { |
char name[20]; |
sprintf(name, "WM SAMP%d", i); |
batch_out(brw, name, offset, 0, "filtering\n"); |
batch_out(brw, name, offset, 1, "wrapping, lod\n"); |
batch_out(brw, name, offset, 2, "default color pointer\n"); |
batch_out(brw, name, offset, 3, "chroma key, aniso\n"); |
samp++; |
offset += sizeof(*samp); |
} |
} |
static void dump_gen7_sampler_state(struct brw_context *brw, |
uint32_t offset, uint32_t size) |
{ |
struct intel_context *intel = &brw->intel; |
struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset; |
int i; |
assert(intel->gen >= 7); |
for (i = 0; i < size / sizeof(*samp); i++) { |
char name[20]; |
sprintf(name, "WM SAMP%d", i); |
batch_out(brw, name, offset, 0, "filtering\n"); |
batch_out(brw, name, offset, 1, "wrapping, lod\n"); |
batch_out(brw, name, offset, 2, "default color pointer\n"); |
batch_out(brw, name, offset, 3, "chroma key, aniso\n"); |
samp++; |
offset += sizeof(*samp); |
} |
} |
static void dump_sf_viewport_state(struct brw_context *brw, |
uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "SF VP"; |
struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset; |
assert(intel->gen < 7); |
batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); |
batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); |
batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); |
batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); |
batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); |
batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); |
batch_out(brw, name, offset, 6, "top left = %d,%d\n", |
vp->scissor.xmin, vp->scissor.ymin); |
batch_out(brw, name, offset, 7, "bottom right = %d,%d\n", |
vp->scissor.xmax, vp->scissor.ymax); |
} |
static void dump_clip_viewport_state(struct brw_context *brw, |
uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "CLIP VP"; |
struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset; |
assert(intel->gen < 7); |
batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin); |
batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax); |
batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin); |
batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax); |
} |
static void dump_sf_clip_viewport_state(struct brw_context *brw, |
uint32_t offset) |
{ |
struct intel_context *intel = &brw->intel; |
const char *name = "SF_CLIP VP"; |
struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset; |
assert(intel->gen >= 7); |
batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); |
batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); |
batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); |
batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); |
batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); |
batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); |
batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); |
batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); |
batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); |
batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); |
} |
static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "CC VP"; |
struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth); |
batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth); |
} |
static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "D_S"; |
struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, |
"stencil %sable, func %d, write %sable\n", |
ds->ds0.stencil_enable ? "en" : "dis", |
ds->ds0.stencil_func, |
ds->ds0.stencil_write_enable ? "en" : "dis"); |
batch_out(brw, name, offset, 1, |
"stencil test mask 0x%x, write mask 0x%x\n", |
ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); |
batch_out(brw, name, offset, 2, |
"depth test %sable, func %d, write %sable\n", |
ds->ds2.depth_test_enable ? "en" : "dis", |
ds->ds2.depth_test_func, |
ds->ds2.depth_write_enable ? "en" : "dis"); |
} |
static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "CC"; |
batch_out(brw, name, offset, 0, "cc0\n"); |
batch_out(brw, name, offset, 1, "cc1\n"); |
batch_out(brw, name, offset, 2, "cc2\n"); |
batch_out(brw, name, offset, 3, "cc3\n"); |
batch_out(brw, name, offset, 4, "cc4: viewport offset\n"); |
batch_out(brw, name, offset, 5, "cc5\n"); |
batch_out(brw, name, offset, 6, "cc6\n"); |
batch_out(brw, name, offset, 7, "cc7\n"); |
} |
static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "CC"; |
struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, |
"alpha test format %s, round disable %d, stencil ref %d, " |
"bf stencil ref %d\n", |
cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", |
cc->cc0.round_disable, |
cc->cc0.stencil_ref, |
cc->cc0.bf_stencil_ref); |
batch_out(brw, name, offset, 1, "\n"); |
batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r); |
batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g); |
batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b); |
batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a); |
} |
static void dump_blend_state(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "BLEND"; |
batch_out(brw, name, offset, 0, "\n"); |
batch_out(brw, name, offset, 1, "\n"); |
} |
static void |
dump_scissor(struct brw_context *brw, uint32_t offset) |
{ |
const char *name = "SCISSOR"; |
struct intel_context *intel = &brw->intel; |
struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset; |
batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n", |
scissor->xmin, scissor->ymin); |
batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n", |
scissor->xmax, scissor->ymax); |
} |
static void |
dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) |
{ |
const char *name = "VS_CONST"; |
struct intel_context *intel = &brw->intel; |
uint32_t *as_uint = intel->batch.bo->virtual + offset; |
float *as_float = intel->batch.bo->virtual + offset; |
int i; |
for (i = 0; i < size / 4; i += 4) { |
batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", |
i / 4, |
as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], |
as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); |
} |
} |
static void |
dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size) |
{ |
const char *name = "WM_CONST"; |
struct intel_context *intel = &brw->intel; |
uint32_t *as_uint = intel->batch.bo->virtual + offset; |
float *as_float = intel->batch.bo->virtual + offset; |
int i; |
for (i = 0; i < size / 4; i += 4) { |
batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", |
i / 4, |
as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], |
as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); |
} |
} |
static void dump_binding_table(struct brw_context *brw, uint32_t offset, |
uint32_t size) |
{ |
char name[20]; |
int i; |
uint32_t *data = brw->intel.batch.bo->virtual + offset; |
for (i = 0; i < size / 4; i++) { |
if (data[i] == 0) |
continue; |
sprintf(name, "BIND%d", i); |
batch_out(brw, name, offset, i, "surface state address\n"); |
} |
} |
static void |
init_brw(struct brw_context *brw, struct ilo_3d_pipeline *p) |
{ |
brw->intel.gen = ILO_GEN_GET_MAJOR(p->dev->gen); |
brw->intel.batch.bo_dst.virtual = intel_bo_get_virtual(p->cp->bo); |
brw->intel.batch.bo = &brw->intel.batch.bo_dst; |
} |
static void |
dump_3d_state(struct ilo_3d_pipeline *p) |
{ |
struct brw_context brw; |
int num_states, i; |
init_brw(&brw, p); |
if (brw.intel.gen >= 7) { |
dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT); |
dump_sf_clip_viewport_state(&brw, p->state.SF_CLIP_VIEWPORT); |
} |
else { |
dump_clip_viewport_state(&brw, p->state.CLIP_VIEWPORT); |
dump_sf_viewport_state(&brw, p->state.SF_VIEWPORT); |
dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT); |
} |
dump_blend_state(&brw, p->state.BLEND_STATE); |
dump_cc_state_gen6(&brw, p->state.COLOR_CALC_STATE); |
dump_depth_stencil_state(&brw, p->state.DEPTH_STENCIL_STATE); |
/* VS */ |
num_states = p->state.vs.BINDING_TABLE_STATE_size; |
for (i = 0; i < num_states; i++) { |
if (brw.intel.gen < 7) |
dump_surface_state(&brw, p->state.vs.SURFACE_STATE[i]); |
else |
dump_gen7_surface_state(&brw, p->state.vs.SURFACE_STATE[i]); |
} |
dump_binding_table(&brw, p->state.vs.BINDING_TABLE_STATE, num_states * 4); |
num_states = 0; |
for (i = 0; i < Elements(p->state.vs.SAMPLER_BORDER_COLOR_STATE); i++) { |
if (!p->state.vs.SAMPLER_BORDER_COLOR_STATE[i]) |
continue; |
dump_sdc(&brw, p->state.vs.SAMPLER_BORDER_COLOR_STATE[i]); |
num_states++; |
} |
if (brw.intel.gen < 7) |
dump_sampler_state(&brw, p->state.vs.SAMPLER_STATE, num_states * 16); |
else |
dump_gen7_sampler_state(&brw, p->state.vs.SAMPLER_STATE, num_states * 16); |
if (p->state.vs.PUSH_CONSTANT_BUFFER_size) { |
dump_vs_constants(&brw, p->state.vs.PUSH_CONSTANT_BUFFER, |
p->state.vs.PUSH_CONSTANT_BUFFER_size); |
} |
/* GS */ |
num_states = p->state.gs.BINDING_TABLE_STATE_size; |
for (i = 0; i < num_states; i++) { |
if (!p->state.gs.SURFACE_STATE[i]) |
continue; |
if (brw.intel.gen < 7) |
dump_surface_state(&brw, p->state.gs.SURFACE_STATE[i]); |
else |
dump_gen7_surface_state(&brw, p->state.gs.SURFACE_STATE[i]); |
} |
dump_binding_table(&brw, p->state.gs.BINDING_TABLE_STATE, num_states * 4); |
/* WM */ |
num_states = p->state.wm.BINDING_TABLE_STATE_size; |
for (i = 0; i < num_states; i++) { |
if (!p->state.wm.SURFACE_STATE[i]) |
continue; |
if (brw.intel.gen < 7) |
dump_surface_state(&brw, p->state.wm.SURFACE_STATE[i]); |
else |
dump_gen7_surface_state(&brw, p->state.wm.SURFACE_STATE[i]); |
} |
dump_binding_table(&brw, p->state.wm.BINDING_TABLE_STATE, num_states * 4); |
num_states = 0; |
for (i = 0; i < Elements(p->state.wm.SAMPLER_BORDER_COLOR_STATE); i++) { |
if (!p->state.wm.SAMPLER_BORDER_COLOR_STATE[i]) |
continue; |
dump_sdc(&brw, p->state.wm.SAMPLER_BORDER_COLOR_STATE[i]); |
num_states++; |
} |
if (brw.intel.gen < 7) |
dump_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16); |
else |
dump_gen7_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16); |
dump_scissor(&brw, p->state.SCISSOR_RECT); |
(void) dump_vs_state; |
(void) dump_gs_state; |
(void) dump_clip_state; |
(void) dump_sf_state; |
(void) dump_wm_state; |
(void) dump_cc_state_gen4; |
(void) dump_wm_constants; |
} |
/** |
* Dump the pipeline. |
*/ |
void |
ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p) |
{ |
int err; |
ilo_cp_dump(p->cp); |
err = intel_bo_map(p->cp->bo, false); |
if (!err) { |
dump_3d_state(p); |
intel_bo_unmap(p->cp->bo); |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c |
---|
0,0 → 1,1670 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_dual_blend.h" |
#include "util/u_prim.h" |
#include "intel_reg.h" |
#include "ilo_3d.h" |
#include "ilo_context.h" |
#include "ilo_cp.h" |
#include "ilo_gpe_gen6.h" |
#include "ilo_shader.h" |
#include "ilo_state.h" |
#include "ilo_3d_pipeline.h" |
#include "ilo_3d_pipeline_gen6.h" |
/** |
* This should be called before any depth stall flush (including those |
* produced by non-pipelined state commands) or cache flush on GEN6. |
* |
* \see intel_emit_post_sync_nonzero_flush() |
*/ |
static void |
gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p, |
bool caller_post_sync) |
{ |
assert(p->dev->gen == ILO_GEN(6)); |
/* emit once */ |
if (p->state.has_gen6_wa_pipe_control) |
return; |
p->state.has_gen6_wa_pipe_control = true; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 60: |
* |
* "Pipe-control with CS-stall bit set must be sent BEFORE the |
* pipe-control with a post-sync op and no write-cache flushes." |
* |
* The workaround below necessitates this workaround. |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_CS_STALL | |
PIPE_CONTROL_STALL_AT_SCOREBOARD, |
NULL, 0, false, p->cp); |
/* the caller will emit the post-sync op */ |
if (caller_post_sync) |
return; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 60: |
* |
* "Before any depth stall flush (including those produced by |
* non-pipelined state commands), software needs to first send a |
* PIPE_CONTROL with no bits set except Post-Sync Operation != 0." |
* |
* "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a |
* PIPE_CONTROL with any non-zero post-sync-op is required." |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_WRITE_IMMEDIATE, |
p->workaround_bo, 0, false, p->cp); |
} |
static void |
gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p) |
{ |
assert(p->dev->gen == ILO_GEN(6)); |
gen6_wa_pipe_control_post_sync(p, false); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 305: |
* |
* "Driver must guarentee that all the caches in the depth pipe are |
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This |
* requires driver to send a PIPE_CONTROL with a CS stall along with a |
* Depth Flush prior to this command." |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_CACHE_FLUSH | |
PIPE_CONTROL_CS_STALL, |
0, 0, false, p->cp); |
} |
static void |
gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p) |
{ |
assert(p->dev->gen == ILO_GEN(6)); |
gen6_wa_pipe_control_post_sync(p, false); |
/* |
* According to intel_emit_depth_stall_flushes() of classic i965, we need |
* to emit a sequence of PIPE_CONTROLs prior to emitting depth related |
* commands. |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_STALL, |
NULL, 0, false, p->cp); |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_CACHE_FLUSH, |
NULL, 0, false, p->cp); |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_STALL, |
NULL, 0, false, p->cp); |
} |
static void |
gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p) |
{ |
assert(p->dev->gen == ILO_GEN(6)); |
/* the post-sync workaround should cover this already */ |
if (p->state.has_gen6_wa_pipe_control) |
return; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 274: |
* |
* "A PIPE_CONTROL command, with only the Stall At Pixel Scoreboard |
* field set (DW1 Bit 1), must be issued prior to any change to the |
* value in this field (Maximum Number of Threads in 3DSTATE_WM)" |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_STALL_AT_SCOREBOARD, |
NULL, 0, false, p->cp); |
} |
static void |
gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p) |
{ |
assert(p->dev->gen == ILO_GEN(6)); |
gen6_wa_pipe_control_post_sync(p, false); |
/* |
* According to upload_vs_state() of classic i965, we need to emit |
* PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being |
* buffered by VS FF, to the point that the FF dies. |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_STALL | |
PIPE_CONTROL_INSTRUCTION_FLUSH | |
PIPE_CONTROL_STATE_CACHE_INVALIDATE, |
NULL, 0, false, p->cp); |
} |
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state) |
void |
gen6_pipeline_common_select(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* PIPELINE_SELECT */ |
if (session->hw_ctx_changed) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_PIPELINE_SELECT(p->dev, 0x0, p->cp); |
} |
} |
void |
gen6_pipeline_common_sip(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* STATE_SIP */ |
if (session->hw_ctx_changed) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_STATE_SIP(p->dev, 0, p->cp); |
} |
} |
void |
gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* STATE_BASE_ADDRESS */ |
if (session->state_bo_changed || session->kernel_bo_changed || |
session->batch_bo_changed) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_STATE_BASE_ADDRESS(p->dev, |
NULL, p->cp->bo, p->cp->bo, NULL, ilo->hw3d->kernel.bo, |
0, 0, 0, 0, p->cp); |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 28: |
* |
* "The following commands must be reissued following any change to |
* the base addresses: |
* |
* * 3DSTATE_BINDING_TABLE_POINTERS |
* * 3DSTATE_SAMPLER_STATE_POINTERS |
* * 3DSTATE_VIEWPORT_STATE_POINTERS |
* * 3DSTATE_CC_POINTERS |
* * MEDIA_STATE_POINTERS" |
* |
* 3DSTATE_SCISSOR_STATE_POINTERS is not on the list, but it is |
* reasonable to also reissue the command. Same to PCB. |
*/ |
session->viewport_state_changed = true; |
session->cc_state_blend_changed = true; |
session->cc_state_dsa_changed = true; |
session->cc_state_cc_changed = true; |
session->scissor_state_changed = true; |
session->binding_table_vs_changed = true; |
session->binding_table_gs_changed = true; |
session->binding_table_fs_changed = true; |
session->sampler_state_vs_changed = true; |
session->sampler_state_gs_changed = true; |
session->sampler_state_fs_changed = true; |
session->pcb_state_vs_changed = true; |
session->pcb_state_gs_changed = true; |
session->pcb_state_fs_changed = true; |
} |
} |
static void |
gen6_pipeline_common_urb(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_URB */ |
if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) { |
const bool gs_active = (ilo->gs || (ilo->vs && |
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_GEN6_SO))); |
int vs_entry_size, gs_entry_size; |
int vs_total_size, gs_total_size; |
vs_entry_size = (ilo->vs) ? |
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_OUTPUT_COUNT) : 0; |
/* |
* As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS |
* share VUE handles. The VUE allocation size must be large enough to |
* store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs. |
* |
* I am not sure if the PRM explicitly states that VF and VS share VUE |
* handles. But here is a citation that implies so: |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 44: |
* |
* "Once a FF stage that spawn threads has sufficient input to |
* initiate a thread, it must guarantee that it is safe to request |
* the thread initiation. For all these FF stages, this check is |
* based on : |
* |
* - The availability of output URB entries: |
* - VS: As the input URB entries are overwritten with the |
* VS-generated output data, output URB availability isn't a |
* factor." |
*/ |
if (vs_entry_size < ilo->ve->count) |
vs_entry_size = ilo->ve->count; |
gs_entry_size = (ilo->gs) ? |
ilo_shader_get_kernel_param(ilo->gs, ILO_KERNEL_OUTPUT_COUNT) : |
(gs_active) ? vs_entry_size : 0; |
/* in bytes */ |
vs_entry_size *= sizeof(float) * 4; |
gs_entry_size *= sizeof(float) * 4; |
vs_total_size = ilo->dev->urb_size; |
if (gs_active) { |
vs_total_size /= 2; |
gs_total_size = vs_total_size; |
} |
else { |
gs_total_size = 0; |
} |
p->gen6_3DSTATE_URB(p->dev, vs_total_size, gs_total_size, |
vs_entry_size, gs_entry_size, p->cp); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 27: |
* |
* "Because of a urb corruption caused by allocating a previous |
* gsunit's urb entry to vsunit software is required to send a |
* "GS NULL Fence" (Send URB fence with VS URB size == 1 and GS URB |
* size == 0) plus a dummy DRAW call before any case where VS will |
* be taking over GS URB space." |
*/ |
if (p->state.gs.active && !gs_active) |
ilo_3d_pipeline_emit_flush_gen6(p); |
p->state.gs.active = gs_active; |
} |
} |
static void |
gen6_pipeline_common_pointers_1(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_VIEWPORT_STATE_POINTERS */ |
if (session->viewport_state_changed) { |
p->gen6_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev, |
p->state.CLIP_VIEWPORT, |
p->state.SF_VIEWPORT, |
p->state.CC_VIEWPORT, p->cp); |
} |
} |
static void |
gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_CC_STATE_POINTERS */ |
if (session->cc_state_blend_changed || |
session->cc_state_dsa_changed || |
session->cc_state_cc_changed) { |
p->gen6_3DSTATE_CC_STATE_POINTERS(p->dev, |
p->state.BLEND_STATE, |
p->state.DEPTH_STENCIL_STATE, |
p->state.COLOR_CALC_STATE, p->cp); |
} |
/* 3DSTATE_SAMPLER_STATE_POINTERS */ |
if (session->sampler_state_vs_changed || |
session->sampler_state_gs_changed || |
session->sampler_state_fs_changed) { |
p->gen6_3DSTATE_SAMPLER_STATE_POINTERS(p->dev, |
p->state.vs.SAMPLER_STATE, |
0, |
p->state.wm.SAMPLER_STATE, p->cp); |
} |
} |
static void |
gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_SCISSOR_STATE_POINTERS */ |
if (session->scissor_state_changed) { |
p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev, |
p->state.SCISSOR_RECT, p->cp); |
} |
/* 3DSTATE_BINDING_TABLE_POINTERS */ |
if (session->binding_table_vs_changed || |
session->binding_table_gs_changed || |
session->binding_table_fs_changed) { |
p->gen6_3DSTATE_BINDING_TABLE_POINTERS(p->dev, |
p->state.vs.BINDING_TABLE_STATE, |
p->state.gs.BINDING_TABLE_STATE, |
p->state.wm.BINDING_TABLE_STATE, p->cp); |
} |
} |
void |
gen6_pipeline_vf(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_INDEX_BUFFER */ |
if (DIRTY(IB) || session->primitive_restart_changed || |
session->batch_bo_changed) { |
p->gen6_3DSTATE_INDEX_BUFFER(p->dev, |
&ilo->ib, ilo->draw->primitive_restart, p->cp); |
} |
/* 3DSTATE_VERTEX_BUFFERS */ |
if (DIRTY(VB) || DIRTY(VE) || session->batch_bo_changed) { |
p->gen6_3DSTATE_VERTEX_BUFFERS(p->dev, |
ilo->vb.states, ilo->vb.enabled_mask, ilo->ve, p->cp); |
} |
/* 3DSTATE_VERTEX_ELEMENTS */ |
if (DIRTY(VE) || DIRTY(VS)) { |
const struct ilo_ve_state *ve = ilo->ve; |
bool last_velement_edgeflag = false; |
bool prepend_generate_ids = false; |
if (ilo->vs) { |
if (ilo_shader_get_kernel_param(ilo->vs, |
ILO_KERNEL_VS_INPUT_EDGEFLAG)) { |
/* we rely on the state tracker here */ |
assert(ilo_shader_get_kernel_param(ilo->vs, |
ILO_KERNEL_INPUT_COUNT) == ve->count); |
last_velement_edgeflag = true; |
} |
if (ilo_shader_get_kernel_param(ilo->vs, |
ILO_KERNEL_VS_INPUT_INSTANCEID) || |
ilo_shader_get_kernel_param(ilo->vs, |
ILO_KERNEL_VS_INPUT_VERTEXID)) |
prepend_generate_ids = true; |
} |
p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev, ve, |
last_velement_edgeflag, prepend_generate_ids, p->cp); |
} |
} |
void |
gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_VF_STATISTICS */ |
if (session->hw_ctx_changed) |
p->gen6_3DSTATE_VF_STATISTICS(p->dev, false, p->cp); |
} |
void |
gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DPRIMITIVE */ |
p->gen6_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp); |
p->state.has_gen6_wa_pipe_control = false; |
} |
void |
gen6_pipeline_vs(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) || |
session->kernel_bo_changed); |
const bool emit_3dstate_constant_vs = session->pcb_state_vs_changed; |
/* |
* the classic i965 does this in upload_vs_state(), citing a spec that I |
* cannot find |
*/ |
if (emit_3dstate_vs && p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
/* 3DSTATE_CONSTANT_VS */ |
if (emit_3dstate_constant_vs) { |
p->gen6_3DSTATE_CONSTANT_VS(p->dev, |
&p->state.vs.PUSH_CONSTANT_BUFFER, |
&p->state.vs.PUSH_CONSTANT_BUFFER_size, |
1, p->cp); |
} |
/* 3DSTATE_VS */ |
if (emit_3dstate_vs) { |
const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count; |
p->gen6_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp); |
} |
if (emit_3dstate_constant_vs && p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_vs_const_flush(p); |
} |
static void |
gen6_pipeline_gs(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_CONSTANT_GS */ |
if (session->pcb_state_gs_changed) |
p->gen6_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp); |
/* 3DSTATE_GS */ |
if (DIRTY(GS) || DIRTY(VS) || |
session->prim_changed || session->kernel_bo_changed) { |
const int verts_per_prim = u_vertices_per_prim(session->reduced_prim); |
p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp); |
} |
} |
bool |
gen6_pipeline_update_max_svbi(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
if (DIRTY(VS) || DIRTY(GS) || DIRTY(SO)) { |
const struct pipe_stream_output_info *so_info = |
(ilo->gs) ? ilo_shader_get_kernel_so_info(ilo->gs) : |
(ilo->vs) ? ilo_shader_get_kernel_so_info(ilo->vs) : NULL; |
unsigned max_svbi = 0xffffffff; |
int i; |
for (i = 0; i < so_info->num_outputs; i++) { |
const int output_buffer = so_info->output[i].output_buffer; |
const struct pipe_stream_output_target *so = |
ilo->so.states[output_buffer]; |
const int struct_size = so_info->stride[output_buffer] * 4; |
const int elem_size = so_info->output[i].num_components * 4; |
int buf_size, count; |
if (!so) { |
max_svbi = 0; |
break; |
} |
buf_size = so->buffer_size - so_info->output[i].dst_offset * 4; |
count = buf_size / struct_size; |
if (buf_size % struct_size >= elem_size) |
count++; |
if (count < max_svbi) |
max_svbi = count; |
} |
if (p->state.so_max_vertices != max_svbi) { |
p->state.so_max_vertices = max_svbi; |
return true; |
} |
} |
return false; |
} |
static void |
gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
const bool emit = gen6_pipeline_update_max_svbi(p, ilo, session); |
/* 3DSTATE_GS_SVB_INDEX */ |
if (emit) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_3DSTATE_GS_SVB_INDEX(p->dev, |
0, p->state.so_num_vertices, p->state.so_max_vertices, |
false, p->cp); |
if (session->hw_ctx_changed) { |
int i; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 148: |
* |
* "If a buffer is not enabled then the SVBI must be set to 0x0 |
* in order to not cause overflow in that SVBI." |
* |
* "If a buffer is not enabled then the MaxSVBI must be set to |
* 0xFFFFFFFF in order to not cause overflow in that SVBI." |
*/ |
for (i = 1; i < 4; i++) { |
p->gen6_3DSTATE_GS_SVB_INDEX(p->dev, |
i, 0, 0xffffffff, false, p->cp); |
} |
} |
} |
} |
void |
gen6_pipeline_clip(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_CLIP */ |
if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) { |
bool enable_guardband = true; |
unsigned i; |
/* |
* We do not do 2D clipping yet. Guard band test should only be enabled |
* when the viewport is larger than the framebuffer. |
*/ |
for (i = 0; i < ilo->viewport.count; i++) { |
const struct ilo_viewport_cso *vp = &ilo->viewport.cso[i]; |
if (vp->min_x > 0.0f || vp->max_x < ilo->fb.state.width || |
vp->min_y > 0.0f || vp->max_y < ilo->fb.state.height) { |
enable_guardband = false; |
break; |
} |
} |
p->gen6_3DSTATE_CLIP(p->dev, ilo->rasterizer, |
ilo->fs, enable_guardband, 1, p->cp); |
} |
} |
static void |
gen6_pipeline_sf(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_SF */ |
if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) { |
p->gen6_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fs, |
(ilo->gs) ? ilo->gs : ilo->vs, p->cp); |
} |
} |
void |
gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_DRAWING_RECTANGLE */ |
if (DIRTY(FB)) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0, |
ilo->fb.state.width, ilo->fb.state.height, p->cp); |
} |
} |
static void |
gen6_pipeline_wm(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_CONSTANT_PS */ |
if (session->pcb_state_fs_changed) |
p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp); |
/* 3DSTATE_WM */ |
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) || |
DIRTY(RASTERIZER) || session->kernel_bo_changed) { |
const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count; |
const bool dual_blend = ilo->blend->dual_blend; |
const bool cc_may_kill = (ilo->dsa->alpha.enabled || |
ilo->blend->alpha_to_coverage); |
if (p->dev->gen == ILO_GEN(6) && session->hw_ctx_changed) |
gen6_wa_pipe_control_wm_max_threads_stall(p); |
p->gen6_3DSTATE_WM(p->dev, ilo->fs, num_samplers, |
ilo->rasterizer, dual_blend, cc_may_kill, p->cp); |
} |
} |
static void |
gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ |
if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) { |
const uint32_t *packed_sample_pos; |
packed_sample_pos = (ilo->fb.num_samples > 1) ? |
&p->packed_sample_position_4x : &p->packed_sample_position_1x; |
if (p->dev->gen == ILO_GEN(6)) { |
gen6_wa_pipe_control_post_sync(p, false); |
gen6_wa_pipe_control_wm_multisample_flush(p); |
} |
p->gen6_3DSTATE_MULTISAMPLE(p->dev, |
ilo->fb.num_samples, packed_sample_pos, |
ilo->rasterizer->state.half_pixel_center, p->cp); |
p->gen6_3DSTATE_SAMPLE_MASK(p->dev, |
(ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1, p->cp); |
} |
} |
static void |
gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ |
if (DIRTY(FB) || session->batch_bo_changed) { |
const struct ilo_zs_surface *zs; |
if (ilo->fb.state.zsbuf) { |
const struct ilo_surface_cso *surface = |
(const struct ilo_surface_cso *) ilo->fb.state.zsbuf; |
assert(!surface->is_rt); |
zs = &surface->u.zs; |
} |
else { |
zs = &ilo->fb.null_zs; |
} |
if (p->dev->gen == ILO_GEN(6)) { |
gen6_wa_pipe_control_post_sync(p, false); |
gen6_wa_pipe_control_wm_depth_flush(p); |
} |
p->gen6_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp); |
/* TODO */ |
p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp); |
} |
} |
void |
gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_POLY_STIPPLE_PATTERN and 3DSTATE_POLY_STIPPLE_OFFSET */ |
if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) && |
ilo->rasterizer->state.poly_stipple_enable) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_3DSTATE_POLY_STIPPLE_PATTERN(p->dev, |
&ilo->poly_stipple, p->cp); |
p->gen6_3DSTATE_POLY_STIPPLE_OFFSET(p->dev, 0, 0, p->cp); |
} |
/* 3DSTATE_LINE_STIPPLE */ |
if (DIRTY(RASTERIZER) && ilo->rasterizer->state.line_stipple_enable) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_3DSTATE_LINE_STIPPLE(p->dev, |
ilo->rasterizer->state.line_stipple_pattern, |
ilo->rasterizer->state.line_stipple_factor + 1, p->cp); |
} |
/* 3DSTATE_AA_LINE_PARAMETERS */ |
if (DIRTY(RASTERIZER) && ilo->rasterizer->state.line_smooth) { |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_3DSTATE_AA_LINE_PARAMETERS(p->dev, p->cp); |
} |
} |
static void |
gen6_pipeline_state_viewports(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* SF_CLIP_VIEWPORT and CC_VIEWPORT */ |
if (p->dev->gen >= ILO_GEN(7) && DIRTY(VIEWPORT)) { |
p->state.SF_CLIP_VIEWPORT = p->gen7_SF_CLIP_VIEWPORT(p->dev, |
ilo->viewport.cso, ilo->viewport.count, p->cp); |
p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev, |
ilo->viewport.cso, ilo->viewport.count, p->cp); |
session->viewport_state_changed = true; |
} |
/* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */ |
else if (DIRTY(VIEWPORT)) { |
p->state.CLIP_VIEWPORT = p->gen6_CLIP_VIEWPORT(p->dev, |
ilo->viewport.cso, ilo->viewport.count, p->cp); |
p->state.SF_VIEWPORT = p->gen6_SF_VIEWPORT(p->dev, |
ilo->viewport.cso, ilo->viewport.count, p->cp); |
p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev, |
ilo->viewport.cso, ilo->viewport.count, p->cp); |
session->viewport_state_changed = true; |
} |
} |
static void |
gen6_pipeline_state_cc(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* BLEND_STATE */ |
if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) { |
p->state.BLEND_STATE = p->gen6_BLEND_STATE(p->dev, |
ilo->blend, &ilo->fb, &ilo->dsa->alpha, p->cp); |
session->cc_state_blend_changed = true; |
} |
/* COLOR_CALC_STATE */ |
if (DIRTY(DSA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) { |
p->state.COLOR_CALC_STATE = |
p->gen6_COLOR_CALC_STATE(p->dev, &ilo->stencil_ref, |
ilo->dsa->alpha.ref_value, &ilo->blend_color, p->cp); |
session->cc_state_cc_changed = true; |
} |
/* DEPTH_STENCIL_STATE */ |
if (DIRTY(DSA)) { |
p->state.DEPTH_STENCIL_STATE = |
p->gen6_DEPTH_STENCIL_STATE(p->dev, ilo->dsa, p->cp); |
session->cc_state_dsa_changed = true; |
} |
} |
static void |
gen6_pipeline_state_scissors(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* SCISSOR_RECT */ |
if (DIRTY(SCISSOR) || DIRTY(VIEWPORT)) { |
/* there should be as many scissors as there are viewports */ |
p->state.SCISSOR_RECT = p->gen6_SCISSOR_RECT(p->dev, |
&ilo->scissor, ilo->viewport.count, p->cp); |
session->scissor_state_changed = true; |
} |
} |
static void |
gen6_pipeline_state_surfaces_rt(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* SURFACE_STATEs for render targets */ |
if (DIRTY(FB)) { |
const struct ilo_fb_state *fb = &ilo->fb; |
const int offset = ILO_WM_DRAW_SURFACE(0); |
uint32_t *surface_state = &p->state.wm.SURFACE_STATE[offset]; |
int i; |
for (i = 0; i < fb->state.nr_cbufs; i++) { |
const struct ilo_surface_cso *surface = |
(const struct ilo_surface_cso *) fb->state.cbufs[i]; |
assert(surface && surface->is_rt); |
surface_state[i] = |
p->gen6_SURFACE_STATE(p->dev, &surface->u.rt, true, p->cp); |
} |
/* |
* Upload at least one render target, as |
* brw_update_renderbuffer_surfaces() does. I don't know why. |
*/ |
if (i == 0) { |
struct ilo_view_surface null_surface; |
ilo_gpe_init_view_surface_null(p->dev, |
fb->state.width, fb->state.height, |
1, 0, &null_surface); |
surface_state[i] = |
p->gen6_SURFACE_STATE(p->dev, &null_surface, true, p->cp); |
i++; |
} |
memset(&surface_state[i], 0, (ILO_MAX_DRAW_BUFFERS - i) * 4); |
if (i && session->num_surfaces[PIPE_SHADER_FRAGMENT] < offset + i) |
session->num_surfaces[PIPE_SHADER_FRAGMENT] = offset + i; |
session->binding_table_fs_changed = true; |
} |
} |
static void |
gen6_pipeline_state_surfaces_so(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
const struct ilo_so_state *so = &ilo->so; |
if (p->dev->gen != ILO_GEN(6)) |
return; |
/* SURFACE_STATEs for stream output targets */ |
if (DIRTY(VS) || DIRTY(GS) || DIRTY(SO)) { |
const struct pipe_stream_output_info *so_info = |
(ilo->gs) ? ilo_shader_get_kernel_so_info(ilo->gs) : |
(ilo->vs) ? ilo_shader_get_kernel_so_info(ilo->vs) : NULL; |
const int offset = ILO_GS_SO_SURFACE(0); |
uint32_t *surface_state = &p->state.gs.SURFACE_STATE[offset]; |
int i; |
for (i = 0; so_info && i < so_info->num_outputs; i++) { |
const int target = so_info->output[i].output_buffer; |
const struct pipe_stream_output_target *so_target = |
(target < so->count) ? so->states[target] : NULL; |
if (so_target) { |
surface_state[i] = p->gen6_so_SURFACE_STATE(p->dev, |
so_target, so_info, i, p->cp); |
} |
else { |
surface_state[i] = 0; |
} |
} |
memset(&surface_state[i], 0, (ILO_MAX_SO_BINDINGS - i) * 4); |
if (i && session->num_surfaces[PIPE_SHADER_GEOMETRY] < offset + i) |
session->num_surfaces[PIPE_SHADER_GEOMETRY] = offset + i; |
session->binding_table_gs_changed = true; |
} |
} |
static void |
gen6_pipeline_state_surfaces_view(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
int shader_type, |
struct gen6_pipeline_session *session) |
{ |
const struct ilo_view_state *view = &ilo->view[shader_type]; |
uint32_t *surface_state; |
int offset, i; |
bool skip = false; |
/* SURFACE_STATEs for sampler views */ |
switch (shader_type) { |
case PIPE_SHADER_VERTEX: |
if (DIRTY(VIEW_VS)) { |
offset = ILO_VS_TEXTURE_SURFACE(0); |
surface_state = &p->state.vs.SURFACE_STATE[offset]; |
session->binding_table_vs_changed = true; |
} |
else { |
skip = true; |
} |
break; |
case PIPE_SHADER_FRAGMENT: |
if (DIRTY(VIEW_FS)) { |
offset = ILO_WM_TEXTURE_SURFACE(0); |
surface_state = &p->state.wm.SURFACE_STATE[offset]; |
session->binding_table_fs_changed = true; |
} |
else { |
skip = true; |
} |
break; |
default: |
skip = true; |
break; |
} |
if (skip) |
return; |
for (i = 0; i < view->count; i++) { |
if (view->states[i]) { |
const struct ilo_view_cso *cso = |
(const struct ilo_view_cso *) view->states[i]; |
surface_state[i] = |
p->gen6_SURFACE_STATE(p->dev, &cso->surface, false, p->cp); |
} |
else { |
surface_state[i] = 0; |
} |
} |
memset(&surface_state[i], 0, (ILO_MAX_SAMPLER_VIEWS - i) * 4); |
if (i && session->num_surfaces[shader_type] < offset + i) |
session->num_surfaces[shader_type] = offset + i; |
} |
static void |
gen6_pipeline_state_surfaces_const(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
int shader_type, |
struct gen6_pipeline_session *session) |
{ |
const struct ilo_cbuf_state *cbuf = &ilo->cbuf[shader_type]; |
uint32_t *surface_state; |
int offset, count, i; |
bool skip = false; |
/* SURFACE_STATEs for constant buffers */ |
switch (shader_type) { |
case PIPE_SHADER_VERTEX: |
if (DIRTY(CBUF)) { |
offset = ILO_VS_CONST_SURFACE(0); |
surface_state = &p->state.vs.SURFACE_STATE[offset]; |
session->binding_table_vs_changed = true; |
} |
else { |
skip = true; |
} |
break; |
case PIPE_SHADER_FRAGMENT: |
if (DIRTY(CBUF)) { |
offset = ILO_WM_CONST_SURFACE(0); |
surface_state = &p->state.wm.SURFACE_STATE[offset]; |
session->binding_table_fs_changed = true; |
} |
else { |
skip = true; |
} |
break; |
default: |
skip = true; |
break; |
} |
if (skip) |
return; |
count = util_last_bit(cbuf->enabled_mask); |
for (i = 0; i < count; i++) { |
if (cbuf->cso[i].resource) { |
surface_state[i] = p->gen6_SURFACE_STATE(p->dev, |
&cbuf->cso[i].surface, false, p->cp); |
} |
else { |
surface_state[i] = 0; |
} |
} |
memset(&surface_state[count], 0, (ILO_MAX_CONST_BUFFERS - count) * 4); |
if (count && session->num_surfaces[shader_type] < offset + count) |
session->num_surfaces[shader_type] = offset + count; |
} |
static void |
gen6_pipeline_state_binding_tables(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
int shader_type, |
struct gen6_pipeline_session *session) |
{ |
uint32_t *binding_table_state, *surface_state; |
int *binding_table_state_size, size; |
bool skip = false; |
/* BINDING_TABLE_STATE */ |
switch (shader_type) { |
case PIPE_SHADER_VERTEX: |
surface_state = p->state.vs.SURFACE_STATE; |
binding_table_state = &p->state.vs.BINDING_TABLE_STATE; |
binding_table_state_size = &p->state.vs.BINDING_TABLE_STATE_size; |
skip = !session->binding_table_vs_changed; |
break; |
case PIPE_SHADER_GEOMETRY: |
surface_state = p->state.gs.SURFACE_STATE; |
binding_table_state = &p->state.gs.BINDING_TABLE_STATE; |
binding_table_state_size = &p->state.gs.BINDING_TABLE_STATE_size; |
skip = !session->binding_table_gs_changed; |
break; |
case PIPE_SHADER_FRAGMENT: |
surface_state = p->state.wm.SURFACE_STATE; |
binding_table_state = &p->state.wm.BINDING_TABLE_STATE; |
binding_table_state_size = &p->state.wm.BINDING_TABLE_STATE_size; |
skip = !session->binding_table_fs_changed; |
break; |
default: |
skip = true; |
break; |
} |
if (skip) |
return; |
/* |
* If we have seemingly less SURFACE_STATEs than before, it could be that |
* we did not touch those reside at the tail in this upload. Loop over |
* them to figure out the real number of SURFACE_STATEs. |
*/ |
for (size = *binding_table_state_size; |
size > session->num_surfaces[shader_type]; size--) { |
if (surface_state[size - 1]) |
break; |
} |
if (size < session->num_surfaces[shader_type]) |
size = session->num_surfaces[shader_type]; |
*binding_table_state = p->gen6_BINDING_TABLE_STATE(p->dev, |
surface_state, size, p->cp); |
*binding_table_state_size = size; |
} |
static void |
gen6_pipeline_state_samplers(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
int shader_type, |
struct gen6_pipeline_session *session) |
{ |
const struct ilo_sampler_cso * const *samplers = |
ilo->sampler[shader_type].cso; |
const struct pipe_sampler_view * const *views = |
(const struct pipe_sampler_view **) ilo->view[shader_type].states; |
const int num_samplers = ilo->sampler[shader_type].count; |
const int num_views = ilo->view[shader_type].count; |
uint32_t *sampler_state, *border_color_state; |
bool emit_border_color = false; |
bool skip = false; |
/* SAMPLER_BORDER_COLOR_STATE and SAMPLER_STATE */ |
switch (shader_type) { |
case PIPE_SHADER_VERTEX: |
if (DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) { |
sampler_state = &p->state.vs.SAMPLER_STATE; |
border_color_state = p->state.vs.SAMPLER_BORDER_COLOR_STATE; |
if (DIRTY(SAMPLER_VS)) |
emit_border_color = true; |
session->sampler_state_vs_changed = true; |
} |
else { |
skip = true; |
} |
break; |
case PIPE_SHADER_FRAGMENT: |
if (DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) { |
sampler_state = &p->state.wm.SAMPLER_STATE; |
border_color_state = p->state.wm.SAMPLER_BORDER_COLOR_STATE; |
if (DIRTY(SAMPLER_FS)) |
emit_border_color = true; |
session->sampler_state_fs_changed = true; |
} |
else { |
skip = true; |
} |
break; |
default: |
skip = true; |
break; |
} |
if (skip) |
return; |
if (emit_border_color) { |
int i; |
for (i = 0; i < num_samplers; i++) { |
border_color_state[i] = (samplers[i]) ? |
p->gen6_SAMPLER_BORDER_COLOR_STATE(p->dev, |
samplers[i], p->cp) : 0; |
} |
} |
/* should we take the minimum of num_samplers and num_views? */ |
*sampler_state = p->gen6_SAMPLER_STATE(p->dev, |
samplers, views, |
border_color_state, |
MIN2(num_samplers, num_views), p->cp); |
} |
static void |
gen6_pipeline_state_pcb(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* push constant buffer for VS */ |
if (DIRTY(VS) || DIRTY(CLIP)) { |
const int clip_state_size = (ilo->vs) ? |
ilo_shader_get_kernel_param(ilo->vs, |
ILO_KERNEL_VS_PCB_UCP_SIZE) : 0; |
if (clip_state_size) { |
void *pcb; |
p->state.vs.PUSH_CONSTANT_BUFFER_size = clip_state_size; |
p->state.vs.PUSH_CONSTANT_BUFFER = |
p->gen6_push_constant_buffer(p->dev, |
p->state.vs.PUSH_CONSTANT_BUFFER_size, &pcb, p->cp); |
memcpy(pcb, &ilo->clip, clip_state_size); |
} |
else { |
p->state.vs.PUSH_CONSTANT_BUFFER_size = 0; |
p->state.vs.PUSH_CONSTANT_BUFFER = 0; |
} |
session->pcb_state_vs_changed = true; |
} |
} |
#undef DIRTY |
static void |
gen6_pipeline_commands(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* |
* We try to keep the order of the commands match, as closely as possible, |
* that of the classic i965 driver. It allows us to compare the command |
* streams easily. |
*/ |
gen6_pipeline_common_select(p, ilo, session); |
gen6_pipeline_gs_svbi(p, ilo, session); |
gen6_pipeline_common_sip(p, ilo, session); |
gen6_pipeline_vf_statistics(p, ilo, session); |
gen6_pipeline_common_base_address(p, ilo, session); |
gen6_pipeline_common_pointers_1(p, ilo, session); |
gen6_pipeline_common_urb(p, ilo, session); |
gen6_pipeline_common_pointers_2(p, ilo, session); |
gen6_pipeline_wm_multisample(p, ilo, session); |
gen6_pipeline_vs(p, ilo, session); |
gen6_pipeline_gs(p, ilo, session); |
gen6_pipeline_clip(p, ilo, session); |
gen6_pipeline_sf(p, ilo, session); |
gen6_pipeline_wm(p, ilo, session); |
gen6_pipeline_common_pointers_3(p, ilo, session); |
gen6_pipeline_wm_depth(p, ilo, session); |
gen6_pipeline_wm_raster(p, ilo, session); |
gen6_pipeline_sf_rect(p, ilo, session); |
gen6_pipeline_vf(p, ilo, session); |
gen6_pipeline_vf_draw(p, ilo, session); |
} |
void |
gen6_pipeline_states(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
int shader_type; |
gen6_pipeline_state_viewports(p, ilo, session); |
gen6_pipeline_state_cc(p, ilo, session); |
gen6_pipeline_state_scissors(p, ilo, session); |
gen6_pipeline_state_pcb(p, ilo, session); |
/* |
* upload all SURAFCE_STATEs together so that we know there are minimal |
* paddings |
*/ |
gen6_pipeline_state_surfaces_rt(p, ilo, session); |
gen6_pipeline_state_surfaces_so(p, ilo, session); |
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { |
gen6_pipeline_state_surfaces_view(p, ilo, shader_type, session); |
gen6_pipeline_state_surfaces_const(p, ilo, shader_type, session); |
} |
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { |
gen6_pipeline_state_samplers(p, ilo, shader_type, session); |
/* this must be called after all SURFACE_STATEs are uploaded */ |
gen6_pipeline_state_binding_tables(p, ilo, shader_type, session); |
} |
} |
void |
gen6_pipeline_prepare(const struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
memset(session, 0, sizeof(*session)); |
session->pipe_dirty = ilo->dirty; |
session->reduced_prim = u_reduced_prim(ilo->draw->mode); |
/* available space before the session */ |
session->init_cp_space = ilo_cp_space(p->cp); |
session->hw_ctx_changed = |
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_HW); |
if (session->hw_ctx_changed) { |
/* these should be enough to make everything uploaded */ |
session->batch_bo_changed = true; |
session->state_bo_changed = true; |
session->kernel_bo_changed = true; |
session->prim_changed = true; |
session->primitive_restart_changed = true; |
} |
else { |
/* |
* Any state that involves resources needs to be re-emitted when the |
* batch bo changed. This is because we do not pin the resources and |
* their offsets (or existence) may change between batch buffers. |
* |
* Since we messed around with ILO_3D_PIPELINE_INVALIDATE_BATCH_BO in |
* handle_invalid_batch_bo(), use ILO_3D_PIPELINE_INVALIDATE_STATE_BO as |
* a temporary workaround. |
*/ |
session->batch_bo_changed = |
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_STATE_BO); |
session->state_bo_changed = |
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_STATE_BO); |
session->kernel_bo_changed = |
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO); |
session->prim_changed = (p->state.reduced_prim != session->reduced_prim); |
session->primitive_restart_changed = |
(p->state.primitive_restart != ilo->draw->primitive_restart); |
} |
} |
void |
gen6_pipeline_draw(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* force all states to be uploaded if the state bo changed */ |
if (session->state_bo_changed) |
session->pipe_dirty = ILO_DIRTY_ALL; |
else |
session->pipe_dirty = ilo->dirty; |
session->emit_draw_states(p, ilo, session); |
/* force all commands to be uploaded if the HW context changed */ |
if (session->hw_ctx_changed) |
session->pipe_dirty = ILO_DIRTY_ALL; |
else |
session->pipe_dirty = ilo->dirty; |
session->emit_draw_commands(p, ilo, session); |
} |
void |
gen6_pipeline_end(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* sanity check size estimation */ |
assert(session->init_cp_space - ilo_cp_space(p->cp) <= |
ilo_3d_pipeline_estimate_size(p, ILO_3D_PIPELINE_DRAW, ilo)); |
p->state.reduced_prim = session->reduced_prim; |
p->state.primitive_restart = ilo->draw->primitive_restart; |
} |
static void |
ilo_3d_pipeline_emit_draw_gen6(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo) |
{ |
struct gen6_pipeline_session session; |
gen6_pipeline_prepare(p, ilo, &session); |
session.emit_draw_states = gen6_pipeline_states; |
session.emit_draw_commands = gen6_pipeline_commands; |
gen6_pipeline_draw(p, ilo, &session); |
gen6_pipeline_end(p, ilo, &session); |
} |
void |
ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p) |
{ |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_INSTRUCTION_FLUSH | |
PIPE_CONTROL_WRITE_FLUSH | |
PIPE_CONTROL_DEPTH_CACHE_FLUSH | |
PIPE_CONTROL_VF_CACHE_INVALIDATE | |
PIPE_CONTROL_TC_FLUSH | |
PIPE_CONTROL_NO_WRITE | |
PIPE_CONTROL_CS_STALL, |
0, 0, false, p->cp); |
} |
void |
ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index) |
{ |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, true); |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_WRITE_TIMESTAMP, |
bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE, |
true, p->cp); |
} |
void |
ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index) |
{ |
if (p->dev->gen == ILO_GEN(6)) |
gen6_wa_pipe_control_post_sync(p, false); |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_STALL | |
PIPE_CONTROL_WRITE_DEPTH_COUNT, |
bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE, |
true, p->cp); |
} |
static int |
gen6_pipeline_estimate_commands(const struct ilo_3d_pipeline *p, |
const struct ilo_gpe_gen6 *gen6, |
const struct ilo_context *ilo) |
{ |
static int size; |
enum ilo_gpe_gen6_command cmd; |
if (size) |
return size; |
for (cmd = 0; cmd < ILO_GPE_GEN6_COMMAND_COUNT; cmd++) { |
int count; |
switch (cmd) { |
case ILO_GPE_GEN6_PIPE_CONTROL: |
/* for the workaround */ |
count = 2; |
/* another one after 3DSTATE_URB */ |
count += 1; |
/* and another one after 3DSTATE_CONSTANT_VS */ |
count += 1; |
break; |
case ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX: |
/* there are 4 SVBIs */ |
count = 4; |
break; |
case ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS: |
count = 33; |
break; |
case ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS: |
count = 34; |
break; |
case ILO_GPE_GEN6_MEDIA_VFE_STATE: |
case ILO_GPE_GEN6_MEDIA_CURBE_LOAD: |
case ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD: |
case ILO_GPE_GEN6_MEDIA_GATEWAY_STATE: |
case ILO_GPE_GEN6_MEDIA_STATE_FLUSH: |
case ILO_GPE_GEN6_MEDIA_OBJECT_WALKER: |
/* media commands */ |
count = 0; |
break; |
default: |
count = 1; |
break; |
} |
if (count) |
size += gen6->estimate_command_size(p->dev, cmd, count); |
} |
return size; |
} |
static int |
gen6_pipeline_estimate_states(const struct ilo_3d_pipeline *p, |
const struct ilo_gpe_gen6 *gen6, |
const struct ilo_context *ilo) |
{ |
static int static_size; |
int shader_type, count, size; |
if (!static_size) { |
struct { |
enum ilo_gpe_gen6_state state; |
int count; |
} static_states[] = { |
/* viewports */ |
{ ILO_GPE_GEN6_SF_VIEWPORT, 1 }, |
{ ILO_GPE_GEN6_CLIP_VIEWPORT, 1 }, |
{ ILO_GPE_GEN6_CC_VIEWPORT, 1 }, |
/* cc */ |
{ ILO_GPE_GEN6_COLOR_CALC_STATE, 1 }, |
{ ILO_GPE_GEN6_BLEND_STATE, ILO_MAX_DRAW_BUFFERS }, |
{ ILO_GPE_GEN6_DEPTH_STENCIL_STATE, 1 }, |
/* scissors */ |
{ ILO_GPE_GEN6_SCISSOR_RECT, 1 }, |
/* binding table (vs, gs, fs) */ |
{ ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES }, |
{ ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES }, |
{ ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES }, |
}; |
int i; |
for (i = 0; i < Elements(static_states); i++) { |
static_size += gen6->estimate_state_size(p->dev, |
static_states[i].state, |
static_states[i].count); |
} |
} |
size = static_size; |
/* |
* render targets (fs) |
* stream outputs (gs) |
* sampler views (vs, fs) |
* constant buffers (vs, fs) |
*/ |
count = ilo->fb.state.nr_cbufs; |
if (ilo->gs) { |
const struct pipe_stream_output_info *so_info = |
ilo_shader_get_kernel_so_info(ilo->gs); |
count += so_info->num_outputs; |
} |
else if (ilo->vs) { |
const struct pipe_stream_output_info *so_info = |
ilo_shader_get_kernel_so_info(ilo->vs); |
count += so_info->num_outputs; |
} |
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { |
count += ilo->view[shader_type].count; |
count += util_bitcount(ilo->cbuf[shader_type].enabled_mask); |
} |
if (count) { |
size += gen6->estimate_state_size(p->dev, |
ILO_GPE_GEN6_SURFACE_STATE, count); |
} |
/* samplers (vs, fs) */ |
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { |
count = ilo->sampler[shader_type].count; |
if (count) { |
size += gen6->estimate_state_size(p->dev, |
ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE, count); |
size += gen6->estimate_state_size(p->dev, |
ILO_GPE_GEN6_SAMPLER_STATE, count); |
} |
} |
/* pcb (vs) */ |
if (ilo->vs && |
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) { |
const int pcb_size = |
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE); |
size += gen6->estimate_state_size(p->dev, |
ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER, pcb_size); |
} |
return size; |
} |
static int |
ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p, |
enum ilo_3d_pipeline_action action, |
const void *arg) |
{ |
const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); |
int size; |
switch (action) { |
case ILO_3D_PIPELINE_DRAW: |
{ |
const struct ilo_context *ilo = arg; |
size = gen6_pipeline_estimate_commands(p, gen6, ilo) + |
gen6_pipeline_estimate_states(p, gen6, ilo); |
} |
break; |
case ILO_3D_PIPELINE_FLUSH: |
size = gen6->estimate_command_size(p->dev, |
ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3; |
break; |
case ILO_3D_PIPELINE_WRITE_TIMESTAMP: |
size = gen6->estimate_command_size(p->dev, |
ILO_GPE_GEN6_PIPE_CONTROL, 1) * 2; |
break; |
case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT: |
size = gen6->estimate_command_size(p->dev, |
ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3; |
break; |
default: |
assert(!"unknown 3D pipeline action"); |
size = 0; |
break; |
} |
return size; |
} |
void |
ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p) |
{ |
const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); |
p->estimate_size = ilo_3d_pipeline_estimate_size_gen6; |
p->emit_draw = ilo_3d_pipeline_emit_draw_gen6; |
p->emit_flush = ilo_3d_pipeline_emit_flush_gen6; |
p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6; |
p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6; |
#define GEN6_USE(p, name, from) \ |
p->gen6_ ## name = from->emit_ ## name |
GEN6_USE(p, STATE_BASE_ADDRESS, gen6); |
GEN6_USE(p, STATE_SIP, gen6); |
GEN6_USE(p, PIPELINE_SELECT, gen6); |
GEN6_USE(p, 3DSTATE_BINDING_TABLE_POINTERS, gen6); |
GEN6_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS, gen6); |
GEN6_USE(p, 3DSTATE_URB, gen6); |
GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen6); |
GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen6); |
GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen6); |
GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen6); |
GEN6_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS, gen6); |
GEN6_USE(p, 3DSTATE_CC_STATE_POINTERS, gen6); |
GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen6); |
GEN6_USE(p, 3DSTATE_VS, gen6); |
GEN6_USE(p, 3DSTATE_GS, gen6); |
GEN6_USE(p, 3DSTATE_CLIP, gen6); |
GEN6_USE(p, 3DSTATE_SF, gen6); |
GEN6_USE(p, 3DSTATE_WM, gen6); |
GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen6); |
GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen6); |
GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen6); |
GEN6_USE(p, 3DSTATE_SAMPLE_MASK, gen6); |
GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen6); |
GEN6_USE(p, 3DSTATE_DEPTH_BUFFER, gen6); |
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen6); |
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen6); |
GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen6); |
GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen6); |
GEN6_USE(p, 3DSTATE_GS_SVB_INDEX, gen6); |
GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen6); |
GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen6); |
GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen6); |
GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen6); |
GEN6_USE(p, PIPE_CONTROL, gen6); |
GEN6_USE(p, 3DPRIMITIVE, gen6); |
GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen6); |
GEN6_USE(p, SF_VIEWPORT, gen6); |
GEN6_USE(p, CLIP_VIEWPORT, gen6); |
GEN6_USE(p, CC_VIEWPORT, gen6); |
GEN6_USE(p, COLOR_CALC_STATE, gen6); |
GEN6_USE(p, BLEND_STATE, gen6); |
GEN6_USE(p, DEPTH_STENCIL_STATE, gen6); |
GEN6_USE(p, SCISSOR_RECT, gen6); |
GEN6_USE(p, BINDING_TABLE_STATE, gen6); |
GEN6_USE(p, SURFACE_STATE, gen6); |
GEN6_USE(p, so_SURFACE_STATE, gen6); |
GEN6_USE(p, SAMPLER_STATE, gen6); |
GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen6); |
GEN6_USE(p, push_constant_buffer, gen6); |
#undef GEN6_USE |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h |
---|
0,0 → 1,165 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_3D_PIPELINE_GEN6_H |
#define ILO_3D_PIPELINE_GEN6_H |
#include "ilo_common.h" |
struct ilo_3d_pipeline; |
struct ilo_context; |
struct gen6_pipeline_session { |
uint32_t pipe_dirty; |
int reduced_prim; |
int init_cp_space; |
bool hw_ctx_changed; |
bool batch_bo_changed; |
bool state_bo_changed; |
bool kernel_bo_changed; |
bool prim_changed; |
bool primitive_restart_changed; |
void (*emit_draw_states)(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void (*emit_draw_commands)(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
/* indirect states */ |
bool viewport_state_changed; |
bool cc_state_blend_changed; |
bool cc_state_dsa_changed; |
bool cc_state_cc_changed; |
bool scissor_state_changed; |
bool binding_table_vs_changed; |
bool binding_table_gs_changed; |
bool binding_table_fs_changed; |
bool sampler_state_vs_changed; |
bool sampler_state_gs_changed; |
bool sampler_state_fs_changed; |
bool pcb_state_vs_changed; |
bool pcb_state_gs_changed; |
bool pcb_state_fs_changed; |
int num_surfaces[PIPE_SHADER_TYPES]; |
}; |
void |
gen6_pipeline_prepare(const struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_draw(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_end(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_common_select(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_common_sip(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_vf(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_vs(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_clip(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
gen6_pipeline_states(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
bool |
gen6_pipeline_update_max_svbi(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session); |
void |
ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p); |
void |
ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index); |
void |
ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p, |
struct intel_bo *bo, int index); |
void |
ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p); |
#endif /* ILO_3D_PIPELINE_GEN6_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c |
---|
0,0 → 1,872 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_dual_blend.h" |
#include "intel_reg.h" |
#include "ilo_common.h" |
#include "ilo_context.h" |
#include "ilo_cp.h" |
#include "ilo_gpe_gen7.h" |
#include "ilo_shader.h" |
#include "ilo_state.h" |
#include "ilo_3d_pipeline.h" |
#include "ilo_3d_pipeline_gen6.h" |
#include "ilo_3d_pipeline_gen7.h" |
static void |
gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p, |
bool change_multisample_state, |
bool change_depth_state) |
{ |
struct intel_bo *bo = NULL; |
uint32_t dw1 = PIPE_CONTROL_CS_STALL; |
assert(p->dev->gen == ILO_GEN(7)); |
/* emit once */ |
if (p->state.has_gen6_wa_pipe_control) |
return; |
p->state.has_gen6_wa_pipe_control = true; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 258: |
* |
* "Due to an HW issue driver needs to send a pipe control with stall |
* when ever there is state change in depth bias related state" |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 292: |
* |
* "A PIPE_CONTOL command with the CS Stall bit set must be programmed |
* in the ring after this instruction |
* (3DSTATE_PUSH_CONSTANT_ALLOC_PS)." |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 304: |
* |
* "Driver must ierarchi that all the caches in the depth pipe are |
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This |
* requires driver to send a PIPE_CONTROL with a CS stall along with a |
* Depth Flush prior to this command. |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 315: |
* |
* "Driver must send a least one PIPE_CONTROL command with CS Stall and |
* a post sync operation prior to the group of depth |
* commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, |
* 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)." |
*/ |
if (change_multisample_state) |
dw1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; |
if (change_depth_state) { |
dw1 |= PIPE_CONTROL_WRITE_IMMEDIATE; |
bo = p->workaround_bo; |
} |
p->gen6_PIPE_CONTROL(p->dev, dw1, bo, 0, false, p->cp); |
} |
static void |
gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p) |
{ |
assert(p->dev->gen == ILO_GEN(7)); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 106: |
* |
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall |
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, |
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, |
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL |
* needs to be sent before any combination of VS associated 3DSTATE." |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_STALL | |
PIPE_CONTROL_WRITE_IMMEDIATE, |
p->workaround_bo, 0, false, p->cp); |
} |
static void |
gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p, |
bool change_depth_buffer) |
{ |
assert(p->dev->gen == ILO_GEN(7)); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 276: |
* |
* "The driver must make sure a PIPE_CONTROL with the Depth Stall |
* Enable bit set after all the following states are programmed: |
* |
* * 3DSTATE_PS |
* * 3DSTATE_VIEWPORT_STATE_POINTERS_CC |
* * 3DSTATE_CONSTANT_PS |
* * 3DSTATE_BINDING_TABLE_POINTERS_PS |
* * 3DSTATE_SAMPLER_STATE_POINTERS_PS |
* * 3DSTATE_CC_STATE_POINTERS |
* * 3DSTATE_BLEND_STATE_POINTERS |
* * 3DSTATE_DEPTH_STENCIL_STATE_POINTERS" |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 315: |
* |
* "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., |
* any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, |
* 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first |
* issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit |
* set), followed by a pipelined depth cache flush (PIPE_CONTROL with |
* Depth Flush Bit set, followed by another pipelined depth stall |
* (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise |
* guarantee that the pipeline from WM onwards is already flushed |
* (e.g., via a preceding MI_FLUSH)." |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_STALL, |
NULL, 0, false, p->cp); |
if (!change_depth_buffer) |
return; |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_CACHE_FLUSH, |
NULL, 0, false, p->cp); |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_DEPTH_STALL, |
NULL, 0, false, p->cp); |
} |
static void |
gen7_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p) |
{ |
assert(p->dev->gen == ILO_GEN(7)); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 286: |
* |
* "If this field (Maximum Number of Threads in 3DSTATE_WM) is changed |
* between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at |
* Pixel Scoreboard set is required to be issued." |
*/ |
p->gen6_PIPE_CONTROL(p->dev, |
PIPE_CONTROL_STALL_AT_SCOREBOARD, |
NULL, 0, false, p->cp); |
} |
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state) |
static void |
gen7_pipeline_common_urb(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_URB_{VS,GS,HS,DS} */ |
if (DIRTY(VE) || DIRTY(VS)) { |
/* the first 16KB are reserved for VS and PS PCBs */ |
const int offset = 16 * 1024; |
int vs_entry_size, vs_total_size; |
vs_entry_size = (ilo->vs) ? |
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_OUTPUT_COUNT) : 0; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 35: |
* |
* "Programming Restriction: As the VS URB entry serves as both the |
* per-vertex input and output of the VS shader, the VS URB |
* Allocation Size must be sized to the maximum of the vertex input |
* and output structures." |
*/ |
if (vs_entry_size < ilo->ve->count) |
vs_entry_size = ilo->ve->count; |
vs_entry_size *= sizeof(float) * 4; |
vs_total_size = ilo->dev->urb_size - offset; |
gen7_wa_pipe_control_vs_depth_stall(p); |
p->gen7_3DSTATE_URB_VS(p->dev, |
offset, vs_total_size, vs_entry_size, p->cp); |
p->gen7_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp); |
p->gen7_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp); |
p->gen7_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp); |
} |
} |
static void |
gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */ |
if (session->hw_ctx_changed) { |
/* |
* push constant buffers are only allowed to take up at most the first |
* 16KB of the URB |
*/ |
p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, |
0, 8192, p->cp); |
p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, |
8192, 8192, p->cp); |
gen7_wa_pipe_control_cs_stall(p, true, true); |
} |
} |
static void |
gen7_pipeline_common_pointers_1(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_VIEWPORT_STATE_POINTERS_{CC,SF_CLIP} */ |
if (session->viewport_state_changed) { |
p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev, |
p->state.CC_VIEWPORT, p->cp); |
p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(p->dev, |
p->state.SF_CLIP_VIEWPORT, p->cp); |
} |
} |
static void |
gen7_pipeline_common_pointers_2(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_BLEND_STATE_POINTERS */ |
if (session->cc_state_blend_changed) { |
p->gen7_3DSTATE_BLEND_STATE_POINTERS(p->dev, |
p->state.BLEND_STATE, p->cp); |
} |
/* 3DSTATE_CC_STATE_POINTERS */ |
if (session->cc_state_cc_changed) { |
p->gen7_3DSTATE_CC_STATE_POINTERS(p->dev, |
p->state.COLOR_CALC_STATE, p->cp); |
} |
/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS */ |
if (session->cc_state_dsa_changed) { |
p->gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev, |
p->state.DEPTH_STENCIL_STATE, p->cp); |
} |
} |
static void |
gen7_pipeline_vs(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
const bool emit_3dstate_binding_table = session->binding_table_vs_changed; |
const bool emit_3dstate_sampler_state = session->sampler_state_vs_changed; |
/* see gen6_pipeline_vs() */ |
const bool emit_3dstate_constant_vs = session->pcb_state_vs_changed; |
const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS)); |
/* emit depth stall before any of the VS commands */ |
if (emit_3dstate_binding_table || emit_3dstate_sampler_state || |
emit_3dstate_constant_vs || emit_3dstate_vs) |
gen7_wa_pipe_control_vs_depth_stall(p); |
/* 3DSTATE_BINDING_TABLE_POINTERS_VS */ |
if (emit_3dstate_binding_table) { |
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(p->dev, |
p->state.vs.BINDING_TABLE_STATE, p->cp); |
} |
/* 3DSTATE_SAMPLER_STATE_POINTERS_VS */ |
if (emit_3dstate_sampler_state) { |
p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(p->dev, |
p->state.vs.SAMPLER_STATE, p->cp); |
} |
gen6_pipeline_vs(p, ilo, session); |
} |
static void |
gen7_pipeline_hs(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */ |
if (session->hw_ctx_changed) { |
p->gen7_3DSTATE_CONSTANT_HS(p->dev, 0, 0, 0, p->cp); |
p->gen7_3DSTATE_HS(p->dev, NULL, 0, p->cp); |
} |
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */ |
if (session->hw_ctx_changed) |
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(p->dev, 0, p->cp); |
} |
static void |
gen7_pipeline_te(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_TE */ |
if (session->hw_ctx_changed) |
p->gen7_3DSTATE_TE(p->dev, p->cp); |
} |
static void |
gen7_pipeline_ds(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */ |
if (session->hw_ctx_changed) { |
p->gen7_3DSTATE_CONSTANT_DS(p->dev, 0, 0, 0, p->cp); |
p->gen7_3DSTATE_DS(p->dev, NULL, 0, p->cp); |
} |
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */ |
if (session->hw_ctx_changed) |
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(p->dev, 0, p->cp); |
} |
static void |
gen7_pipeline_gs(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */ |
if (session->hw_ctx_changed) { |
p->gen6_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp); |
p->gen7_3DSTATE_GS(p->dev, NULL, 0, p->cp); |
} |
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */ |
if (session->binding_table_gs_changed) { |
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(p->dev, |
p->state.gs.BINDING_TABLE_STATE, p->cp); |
} |
} |
static void |
gen7_pipeline_sol(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
const struct pipe_stream_output_info *so_info; |
const struct ilo_shader_state *shader; |
bool dirty_sh = false; |
if (ilo->gs) { |
shader = ilo->gs; |
dirty_sh = DIRTY(GS); |
} |
else { |
shader = ilo->vs; |
dirty_sh = DIRTY(VS); |
} |
so_info = ilo_shader_get_kernel_so_info(shader); |
gen6_pipeline_update_max_svbi(p, ilo, session); |
/* 3DSTATE_SO_BUFFER */ |
if ((DIRTY(SO) || dirty_sh || session->batch_bo_changed) && |
ilo->so.enabled) { |
int i; |
for (i = 0; i < ilo->so.count; i++) { |
const int stride = so_info->stride[i] * 4; /* in bytes */ |
int base = 0; |
/* reset HW write offsets and offset buffer base */ |
if (!p->cp->render_ctx) { |
ilo_cp_set_one_off_flags(p->cp, INTEL_EXEC_GEN7_SOL_RESET); |
base += p->state.so_num_vertices * stride; |
} |
p->gen7_3DSTATE_SO_BUFFER(p->dev, i, base, stride, |
ilo->so.states[i], p->cp); |
} |
for (; i < 4; i++) |
p->gen7_3DSTATE_SO_BUFFER(p->dev, i, 0, 0, NULL, p->cp); |
} |
/* 3DSTATE_SO_DECL_LIST */ |
if (dirty_sh && ilo->so.enabled) |
p->gen7_3DSTATE_SO_DECL_LIST(p->dev, so_info, p->cp); |
/* 3DSTATE_STREAMOUT */ |
if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) { |
const unsigned buffer_mask = (1 << ilo->so.count) - 1; |
const int output_count = ilo_shader_get_kernel_param(shader, |
ILO_KERNEL_OUTPUT_COUNT); |
p->gen7_3DSTATE_STREAMOUT(p->dev, buffer_mask, output_count, |
ilo->rasterizer->state.rasterizer_discard, p->cp); |
} |
} |
static void |
gen7_pipeline_sf(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_SBE */ |
if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) { |
p->gen7_3DSTATE_SBE(p->dev, ilo->rasterizer, ilo->fs, |
(ilo->gs) ? ilo->gs : ilo->vs, ilo->cp); |
} |
/* 3DSTATE_SF */ |
if (DIRTY(RASTERIZER) || DIRTY(FB)) { |
gen7_wa_pipe_control_cs_stall(p, true, true); |
p->gen7_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fb.state.zsbuf, p->cp); |
} |
} |
static void |
gen7_pipeline_wm(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_WM */ |
if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) { |
const bool cc_may_kill = (ilo->dsa->alpha.enabled || |
ilo->blend->alpha_to_coverage); |
if (p->dev->gen == ILO_GEN(7) && session->hw_ctx_changed) |
gen7_wa_pipe_control_wm_max_threads_stall(p); |
p->gen7_3DSTATE_WM(p->dev, ilo->fs, |
ilo->rasterizer, cc_may_kill, p->cp); |
} |
/* 3DSTATE_BINDING_TABLE_POINTERS_PS */ |
if (session->binding_table_fs_changed) { |
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(p->dev, |
p->state.wm.BINDING_TABLE_STATE, p->cp); |
} |
/* 3DSTATE_SAMPLER_STATE_POINTERS_PS */ |
if (session->sampler_state_fs_changed) { |
p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(p->dev, |
p->state.wm.SAMPLER_STATE, p->cp); |
} |
/* 3DSTATE_CONSTANT_PS */ |
if (session->pcb_state_fs_changed) |
p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp); |
/* 3DSTATE_PS */ |
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || |
session->kernel_bo_changed) { |
const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count; |
const bool dual_blend = ilo->blend->dual_blend; |
p->gen7_3DSTATE_PS(p->dev, ilo->fs, num_samplers, dual_blend, p->cp); |
} |
/* 3DSTATE_SCISSOR_STATE_POINTERS */ |
if (session->scissor_state_changed) { |
p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev, |
p->state.SCISSOR_RECT, p->cp); |
} |
/* XXX what is the best way to know if this workaround is needed? */ |
{ |
const bool emit_3dstate_ps = |
(DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND)); |
const bool emit_3dstate_depth_buffer = |
(DIRTY(FB) || DIRTY(DSA) || session->state_bo_changed); |
if (emit_3dstate_ps || |
emit_3dstate_depth_buffer || |
session->pcb_state_fs_changed || |
session->viewport_state_changed || |
session->binding_table_fs_changed || |
session->sampler_state_fs_changed || |
session->cc_state_cc_changed || |
session->cc_state_blend_changed || |
session->cc_state_dsa_changed) |
gen7_wa_pipe_control_wm_depth_stall(p, emit_3dstate_depth_buffer); |
} |
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ |
if (DIRTY(FB) || session->batch_bo_changed) { |
const struct ilo_zs_surface *zs; |
if (ilo->fb.state.zsbuf) { |
const struct ilo_surface_cso *surface = |
(const struct ilo_surface_cso *) ilo->fb.state.zsbuf; |
assert(!surface->is_rt); |
zs = &surface->u.zs; |
} |
else { |
zs = &ilo->fb.null_zs; |
} |
p->gen7_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp); |
p->gen6_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp); |
p->gen6_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp); |
/* TODO */ |
p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp); |
} |
} |
static void |
gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ |
if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) { |
const uint32_t *packed_sample_pos; |
gen7_wa_pipe_control_cs_stall(p, true, true); |
packed_sample_pos = |
(ilo->fb.num_samples > 4) ? p->packed_sample_position_8x : |
(ilo->fb.num_samples > 1) ? &p->packed_sample_position_4x : |
&p->packed_sample_position_1x; |
p->gen6_3DSTATE_MULTISAMPLE(p->dev, |
ilo->fb.num_samples, packed_sample_pos, |
ilo->rasterizer->state.half_pixel_center, p->cp); |
p->gen7_3DSTATE_SAMPLE_MASK(p->dev, |
(ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1, |
ilo->fb.num_samples, p->cp); |
} |
} |
static void |
gen7_pipeline_commands(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo, |
struct gen6_pipeline_session *session) |
{ |
/* |
* We try to keep the order of the commands match, as closely as possible, |
* that of the classic i965 driver. It allows us to compare the command |
* streams easily. |
*/ |
gen6_pipeline_common_select(p, ilo, session); |
gen6_pipeline_common_sip(p, ilo, session); |
gen6_pipeline_vf_statistics(p, ilo, session); |
gen7_pipeline_common_pcb_alloc(p, ilo, session); |
gen6_pipeline_common_base_address(p, ilo, session); |
gen7_pipeline_common_pointers_1(p, ilo, session); |
gen7_pipeline_common_urb(p, ilo, session); |
gen7_pipeline_common_pointers_2(p, ilo, session); |
gen7_pipeline_wm_multisample(p, ilo, session); |
gen7_pipeline_gs(p, ilo, session); |
gen7_pipeline_hs(p, ilo, session); |
gen7_pipeline_te(p, ilo, session); |
gen7_pipeline_ds(p, ilo, session); |
gen7_pipeline_vs(p, ilo, session); |
gen7_pipeline_sol(p, ilo, session); |
gen6_pipeline_clip(p, ilo, session); |
gen7_pipeline_sf(p, ilo, session); |
gen7_pipeline_wm(p, ilo, session); |
gen6_pipeline_wm_raster(p, ilo, session); |
gen6_pipeline_sf_rect(p, ilo, session); |
gen6_pipeline_vf(p, ilo, session); |
gen6_pipeline_vf_draw(p, ilo, session); |
} |
static void |
ilo_3d_pipeline_emit_draw_gen7(struct ilo_3d_pipeline *p, |
const struct ilo_context *ilo) |
{ |
struct gen6_pipeline_session session; |
gen6_pipeline_prepare(p, ilo, &session); |
session.emit_draw_states = gen6_pipeline_states; |
session.emit_draw_commands = gen7_pipeline_commands; |
gen6_pipeline_draw(p, ilo, &session); |
gen6_pipeline_end(p, ilo, &session); |
} |
static int |
gen7_pipeline_estimate_commands(const struct ilo_3d_pipeline *p, |
const struct ilo_gpe_gen7 *gen7, |
const struct ilo_context *ilo) |
{ |
static int size; |
enum ilo_gpe_gen7_command cmd; |
if (size) |
return size; |
for (cmd = 0; cmd < ILO_GPE_GEN7_COMMAND_COUNT; cmd++) { |
int count; |
switch (cmd) { |
case ILO_GPE_GEN7_PIPE_CONTROL: |
/* for the workaround */ |
count = 2; |
/* another one after 3DSTATE_URB */ |
count += 1; |
/* and another one after 3DSTATE_CONSTANT_VS */ |
count += 1; |
break; |
case ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS: |
count = 33; |
break; |
case ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS: |
count = 34; |
break; |
case ILO_GPE_GEN7_MEDIA_VFE_STATE: |
case ILO_GPE_GEN7_MEDIA_CURBE_LOAD: |
case ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD: |
case ILO_GPE_GEN7_MEDIA_STATE_FLUSH: |
case ILO_GPE_GEN7_GPGPU_WALKER: |
/* media commands */ |
count = 0; |
break; |
default: |
count = 1; |
break; |
} |
if (count) { |
size += gen7->estimate_command_size(p->dev, |
cmd, count); |
} |
} |
return size; |
} |
static int |
gen7_pipeline_estimate_states(const struct ilo_3d_pipeline *p, |
const struct ilo_gpe_gen7 *gen7, |
const struct ilo_context *ilo) |
{ |
static int static_size; |
int shader_type, count, size; |
if (!static_size) { |
struct { |
enum ilo_gpe_gen7_state state; |
int count; |
} static_states[] = { |
/* viewports */ |
{ ILO_GPE_GEN7_SF_CLIP_VIEWPORT, 1 }, |
{ ILO_GPE_GEN7_CC_VIEWPORT, 1 }, |
/* cc */ |
{ ILO_GPE_GEN7_COLOR_CALC_STATE, 1 }, |
{ ILO_GPE_GEN7_BLEND_STATE, ILO_MAX_DRAW_BUFFERS }, |
{ ILO_GPE_GEN7_DEPTH_STENCIL_STATE, 1 }, |
/* scissors */ |
{ ILO_GPE_GEN7_SCISSOR_RECT, 1 }, |
/* binding table (vs, gs, fs) */ |
{ ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES }, |
{ ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES }, |
{ ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES }, |
}; |
int i; |
for (i = 0; i < Elements(static_states); i++) { |
static_size += gen7->estimate_state_size(p->dev, |
static_states[i].state, |
static_states[i].count); |
} |
} |
size = static_size; |
/* |
* render targets (fs) |
* sampler views (vs, fs) |
* constant buffers (vs, fs) |
*/ |
count = ilo->fb.state.nr_cbufs; |
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { |
count += ilo->view[shader_type].count; |
count += util_bitcount(ilo->cbuf[shader_type].enabled_mask); |
} |
if (count) { |
size += gen7->estimate_state_size(p->dev, |
ILO_GPE_GEN7_SURFACE_STATE, count); |
} |
/* samplers (vs, fs) */ |
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { |
count = ilo->sampler[shader_type].count; |
if (count) { |
size += gen7->estimate_state_size(p->dev, |
ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE, count); |
size += gen7->estimate_state_size(p->dev, |
ILO_GPE_GEN7_SAMPLER_STATE, count); |
} |
} |
/* pcb (vs) */ |
if (ilo->vs && |
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) { |
const int pcb_size = |
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE); |
size += gen7->estimate_state_size(p->dev, |
ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER, pcb_size); |
} |
return size; |
} |
static int |
ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p, |
enum ilo_3d_pipeline_action action, |
const void *arg) |
{ |
const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get(); |
int size; |
switch (action) { |
case ILO_3D_PIPELINE_DRAW: |
{ |
const struct ilo_context *ilo = arg; |
size = gen7_pipeline_estimate_commands(p, gen7, ilo) + |
gen7_pipeline_estimate_states(p, gen7, ilo); |
} |
break; |
case ILO_3D_PIPELINE_FLUSH: |
case ILO_3D_PIPELINE_WRITE_TIMESTAMP: |
case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT: |
size = gen7->estimate_command_size(p->dev, |
ILO_GPE_GEN7_PIPE_CONTROL, 1); |
break; |
default: |
assert(!"unknown 3D pipeline action"); |
size = 0; |
break; |
} |
return size; |
} |
void |
ilo_3d_pipeline_init_gen7(struct ilo_3d_pipeline *p) |
{ |
const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get(); |
p->estimate_size = ilo_3d_pipeline_estimate_size_gen7; |
p->emit_draw = ilo_3d_pipeline_emit_draw_gen7; |
p->emit_flush = ilo_3d_pipeline_emit_flush_gen6; |
p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6; |
p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6; |
#define GEN6_USE(p, name, from) \ |
p->gen6_ ## name = from->emit_ ## name |
GEN6_USE(p, STATE_BASE_ADDRESS, gen7); |
GEN6_USE(p, STATE_SIP, gen7); |
GEN6_USE(p, PIPELINE_SELECT, gen7); |
GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen7); |
GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen7); |
GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen7); |
GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen7); |
GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen7); |
GEN6_USE(p, 3DSTATE_VS, gen7); |
GEN6_USE(p, 3DSTATE_CLIP, gen7); |
GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen7); |
GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen7); |
GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen7); |
GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen7); |
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen7); |
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen7); |
GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen7); |
GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen7); |
GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen7); |
GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen7); |
GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen7); |
GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen7); |
GEN6_USE(p, PIPE_CONTROL, gen7); |
GEN6_USE(p, 3DPRIMITIVE, gen7); |
GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen7); |
GEN6_USE(p, CC_VIEWPORT, gen7); |
GEN6_USE(p, COLOR_CALC_STATE, gen7); |
GEN6_USE(p, BLEND_STATE, gen7); |
GEN6_USE(p, DEPTH_STENCIL_STATE, gen7); |
GEN6_USE(p, SCISSOR_RECT, gen7); |
GEN6_USE(p, BINDING_TABLE_STATE, gen7); |
GEN6_USE(p, SURFACE_STATE, gen7); |
GEN6_USE(p, SAMPLER_STATE, gen7); |
GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen7); |
GEN6_USE(p, push_constant_buffer, gen7); |
#undef GEN6_USE |
#define GEN7_USE(p, name, from) \ |
p->gen7_ ## name = from->emit_ ## name |
GEN7_USE(p, 3DSTATE_DEPTH_BUFFER, gen7); |
GEN7_USE(p, 3DSTATE_CC_STATE_POINTERS, gen7); |
GEN7_USE(p, 3DSTATE_GS, gen7); |
GEN7_USE(p, 3DSTATE_SF, gen7); |
GEN7_USE(p, 3DSTATE_WM, gen7); |
GEN7_USE(p, 3DSTATE_SAMPLE_MASK, gen7); |
GEN7_USE(p, 3DSTATE_CONSTANT_HS, gen7); |
GEN7_USE(p, 3DSTATE_CONSTANT_DS, gen7); |
GEN7_USE(p, 3DSTATE_HS, gen7); |
GEN7_USE(p, 3DSTATE_TE, gen7); |
GEN7_USE(p, 3DSTATE_DS, gen7); |
GEN7_USE(p, 3DSTATE_STREAMOUT, gen7); |
GEN7_USE(p, 3DSTATE_SBE, gen7); |
GEN7_USE(p, 3DSTATE_PS, gen7); |
GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, gen7); |
GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_CC, gen7); |
GEN7_USE(p, 3DSTATE_BLEND_STATE_POINTERS, gen7); |
GEN7_USE(p, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS, gen7); |
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_VS, gen7); |
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_HS, gen7); |
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_DS, gen7); |
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_GS, gen7); |
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_PS, gen7); |
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_VS, gen7); |
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_HS, gen7); |
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_DS, gen7); |
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_GS, gen7); |
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_PS, gen7); |
GEN7_USE(p, 3DSTATE_URB_VS, gen7); |
GEN7_USE(p, 3DSTATE_URB_HS, gen7); |
GEN7_USE(p, 3DSTATE_URB_DS, gen7); |
GEN7_USE(p, 3DSTATE_URB_GS, gen7); |
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_VS, gen7); |
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_HS, gen7); |
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_DS, gen7); |
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_GS, gen7); |
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_PS, gen7); |
GEN7_USE(p, 3DSTATE_SO_DECL_LIST, gen7); |
GEN7_USE(p, 3DSTATE_SO_BUFFER, gen7); |
GEN7_USE(p, SF_CLIP_VIEWPORT, gen7); |
#undef GEN7_USE |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.h |
---|
0,0 → 1,38 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_3D_PIPELINE_GEN7_H |
#define ILO_3D_PIPELINE_GEN7_H |
#include "ilo_common.h" |
struct ilo_3d_pipeline; |
void |
ilo_3d_pipeline_init_gen7(struct ilo_3d_pipeline *p); |
#endif /* ILO_3D_PIPELINE_GEN7_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blit.c |
---|
0,0 → 1,143 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_surface.h" |
#include "ilo_blitter.h" |
#include "ilo_context.h" |
#include "ilo_blit.h" |
static void |
ilo_resource_copy_region(struct pipe_context *pipe, |
struct pipe_resource *dst, |
unsigned dst_level, |
unsigned dstx, unsigned dsty, unsigned dstz, |
struct pipe_resource *src, |
unsigned src_level, |
const struct pipe_box *src_box) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
if (ilo_blitter_blt_copy_resource(ilo->blitter, |
dst, dst_level, dstx, dsty, dstz, |
src, src_level, src_box)) |
return; |
if (ilo_blitter_pipe_copy_resource(ilo->blitter, |
dst, dst_level, dstx, dsty, dstz, |
src, src_level, src_box)) |
return; |
util_resource_copy_region(&ilo->base, dst, dst_level, |
dstx, dsty, dstz, src, src_level, src_box); |
} |
static void |
ilo_clear(struct pipe_context *pipe, |
unsigned buffers, |
const union pipe_color_union *color, |
double depth, |
unsigned stencil) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_blitter_pipe_clear_fb(ilo->blitter, buffers, color, depth, stencil); |
} |
static void |
ilo_clear_render_target(struct pipe_context *pipe, |
struct pipe_surface *dst, |
const union pipe_color_union *color, |
unsigned dstx, unsigned dsty, |
unsigned width, unsigned height) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
if (!width || !height || dstx >= dst->width || dsty >= dst->height) |
return; |
if (dstx + width > dst->width) |
width = dst->width - dstx; |
if (dsty + height > dst->height) |
height = dst->height - dsty; |
if (ilo_blitter_blt_clear_rt(ilo->blitter, |
dst, color, dstx, dsty, width, height)) |
return; |
ilo_blitter_pipe_clear_rt(ilo->blitter, |
dst, color, dstx, dsty, width, height); |
} |
static void |
ilo_clear_depth_stencil(struct pipe_context *pipe, |
struct pipe_surface *dst, |
unsigned clear_flags, |
double depth, |
unsigned stencil, |
unsigned dstx, unsigned dsty, |
unsigned width, unsigned height) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
if (!width || !height || dstx >= dst->width || dsty >= dst->height) |
return; |
if (dstx + width > dst->width) |
width = dst->width - dstx; |
if (dsty + height > dst->height) |
height = dst->height - dsty; |
if (ilo_blitter_blt_clear_zs(ilo->blitter, |
dst, clear_flags, depth, stencil, dstx, dsty, width, height)) |
return; |
ilo_blitter_pipe_clear_zs(ilo->blitter, |
dst, clear_flags, depth, stencil, dstx, dsty, width, height); |
} |
static void |
ilo_blit(struct pipe_context *pipe, const struct pipe_blit_info *info) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_blitter_pipe_blit(ilo->blitter, info); |
} |
/** |
* Initialize blit-related functions. |
*/ |
void |
ilo_init_blit_functions(struct ilo_context *ilo) |
{ |
ilo->base.resource_copy_region = ilo_resource_copy_region; |
ilo->base.blit = ilo_blit; |
ilo->base.clear = ilo_clear; |
ilo->base.clear_render_target = ilo_clear_render_target; |
ilo->base.clear_depth_stencil = ilo_clear_depth_stencil; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blit.h |
---|
0,0 → 1,38 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BLIT_H |
#define ILO_BLIT_H |
#include "ilo_common.h" |
struct ilo_context; |
void |
ilo_init_blit_functions(struct ilo_context *ilo); |
#endif /* ILO_BLIT_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter.c |
---|
0,0 → 1,74 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_blitter.h" |
#include "ilo_context.h" |
#include "ilo_blitter.h" |
static bool |
ilo_blitter_pipe_create(struct ilo_blitter *blitter) |
{ |
if (blitter->pipe_blitter) |
return true; |
blitter->pipe_blitter = util_blitter_create(&blitter->ilo->base); |
return (blitter->pipe_blitter != NULL); |
} |
/** |
* Create a blitter. Because the use of util_blitter, this must be called |
* after the context is initialized. |
*/ |
struct ilo_blitter * |
ilo_blitter_create(struct ilo_context *ilo) |
{ |
struct ilo_blitter *blitter; |
blitter = CALLOC_STRUCT(ilo_blitter); |
if (!blitter) |
return NULL; |
blitter->ilo = ilo; |
if (!ilo_blitter_pipe_create(blitter)) { |
FREE(blitter); |
return NULL; |
} |
return blitter; |
} |
void |
ilo_blitter_destroy(struct ilo_blitter *blitter) |
{ |
if (blitter->pipe_blitter) |
util_blitter_destroy(blitter->pipe_blitter); |
FREE(blitter); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter.h |
---|
0,0 → 1,102 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_BLITTER_H |
#define ILO_BLITTER_H |
#include "ilo_common.h" |
struct ilo_context; |
struct blitter_context; |
struct ilo_blitter { |
struct ilo_context *ilo; |
struct blitter_context *pipe_blitter; |
}; |
struct ilo_blitter * |
ilo_blitter_create(struct ilo_context *ilo); |
void |
ilo_blitter_destroy(struct ilo_blitter *blitter); |
bool |
ilo_blitter_pipe_blit(struct ilo_blitter *blitter, |
const struct pipe_blit_info *info); |
bool |
ilo_blitter_pipe_copy_resource(struct ilo_blitter *blitter, |
struct pipe_resource *dst, unsigned dst_level, |
unsigned dst_x, unsigned dst_y, unsigned dst_z, |
struct pipe_resource *src, unsigned src_level, |
const struct pipe_box *src_box); |
bool |
ilo_blitter_pipe_clear_rt(struct ilo_blitter *blitter, |
struct pipe_surface *rt, |
const union pipe_color_union *color, |
unsigned x, unsigned y, |
unsigned width, unsigned height); |
bool |
ilo_blitter_pipe_clear_zs(struct ilo_blitter *blitter, |
struct pipe_surface *zs, |
unsigned clear_flags, |
double depth, unsigned stencil, |
unsigned x, unsigned y, |
unsigned width, unsigned height); |
bool |
ilo_blitter_pipe_clear_fb(struct ilo_blitter *blitter, |
unsigned buffers, |
const union pipe_color_union *color, |
double depth, unsigned stencil); |
bool |
ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter, |
struct pipe_resource *dst, unsigned dst_level, |
unsigned dst_x, unsigned dst_y, unsigned dst_z, |
struct pipe_resource *src, unsigned src_level, |
const struct pipe_box *src_box); |
bool |
ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter, |
struct pipe_surface *rt, |
const union pipe_color_union *color, |
unsigned x, unsigned y, |
unsigned width, unsigned height); |
bool |
ilo_blitter_blt_clear_zs(struct ilo_blitter *blitter, |
struct pipe_surface *zs, |
unsigned clear_flags, |
double depth, unsigned stencil, |
unsigned x, unsigned y, |
unsigned width, unsigned height); |
#endif /* ILO_BLITTER_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter_blt.c |
---|
0,0 → 1,812 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_pack_color.h" |
#include "intel_reg.h" |
#include "ilo_3d.h" |
#include "ilo_context.h" |
#include "ilo_cp.h" |
#include "ilo_resource.h" |
#include "ilo_blitter.h" |
#ifndef COLOR_BLT_CMD |
#define COLOR_BLT_CMD (CMD_2D | (0x40 << 22)) |
#endif |
#ifndef SRC_COPY_BLT_CMD |
#define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22)) |
#endif |
enum gen6_blt_mask { |
GEN6_BLT_MASK_8, |
GEN6_BLT_MASK_16, |
GEN6_BLT_MASK_32, |
GEN6_BLT_MASK_32_LO, |
GEN6_BLT_MASK_32_HI, |
}; |
/* |
* From the Sandy Bridge PRM, volume 1 part 5, page 7: |
* |
* "The BLT engine is capable of transferring very large quantities of |
* graphics data. Any graphics data read from and written to the |
* destination is permitted to represent a number of pixels that occupies |
* up to 65,536 scan lines and up to 32,768 bytes per scan line at the |
* destination. The maximum number of pixels that may be represented per |
* scan line's worth of graphics data depends on the color depth." |
*/ |
static const int gen6_max_bytes_per_scanline = 32768; |
static const int gen6_max_scanlines = 65536; |
static void |
gen6_emit_MI_FLUSH_DW(struct ilo_dev_info *dev, struct ilo_cp *cp) |
{ |
const uint8_t cmd_len = 4; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, MI_FLUSH_DW | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_MI_LOAD_REGISTER_IMM(struct ilo_dev_info *dev, |
uint32_t reg, uint32_t val, |
struct ilo_cp *cp) |
{ |
const uint8_t cmd_len = 3; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, MI_LOAD_REGISTER_IMM | (cmd_len - 2)); |
ilo_cp_write(cp, reg); |
ilo_cp_write(cp, val); |
ilo_cp_end(cp); |
} |
static uint32_t |
gen6_translate_blt_value_mask(enum gen6_blt_mask value_mask) |
{ |
switch (value_mask) { |
case GEN6_BLT_MASK_8: return BR13_8; |
case GEN6_BLT_MASK_16: return BR13_565; |
default: return BR13_8888; |
} |
} |
static uint32_t |
gen6_translate_blt_write_mask(enum gen6_blt_mask write_mask) |
{ |
switch (write_mask) { |
case GEN6_BLT_MASK_32: return XY_BLT_WRITE_RGB | |
XY_BLT_WRITE_ALPHA; |
case GEN6_BLT_MASK_32_LO: return XY_BLT_WRITE_RGB; |
case GEN6_BLT_MASK_32_HI: return XY_BLT_WRITE_ALPHA; |
default: return 0; |
} |
} |
static uint32_t |
gen6_translate_blt_cpp(enum gen6_blt_mask mask) |
{ |
switch (mask) { |
case GEN6_BLT_MASK_8: return 1; |
case GEN6_BLT_MASK_16: return 2; |
default: return 4; |
} |
} |
static void |
gen6_emit_COLOR_BLT(struct ilo_dev_info *dev, |
struct intel_bo *dst_bo, |
int16_t dst_pitch, uint32_t dst_offset, |
uint16_t width, uint16_t height, |
uint32_t pattern, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask, |
struct ilo_cp *cp) |
{ |
const uint8_t cmd_len = 5; |
const int cpp = gen6_translate_blt_cpp(value_mask); |
uint32_t dw0, dw1; |
dw0 = COLOR_BLT_CMD | |
gen6_translate_blt_write_mask(write_mask) | |
(cmd_len - 2); |
assert(width < gen6_max_bytes_per_scanline); |
assert(height < gen6_max_scanlines); |
/* offsets are naturally aligned and pitches are dword-aligned */ |
assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0); |
dw1 = rop << 16 | |
gen6_translate_blt_value_mask(value_mask) | |
dst_pitch; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, dw0); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, height << 16 | width); |
ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, |
INTEL_DOMAIN_RENDER); |
ilo_cp_write(cp, pattern); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_XY_COLOR_BLT(struct ilo_dev_info *dev, |
struct intel_bo *dst_bo, |
enum intel_tiling_mode dst_tiling, |
int16_t dst_pitch, uint32_t dst_offset, |
int16_t x1, int16_t y1, int16_t x2, int16_t y2, |
uint32_t pattern, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask, |
struct ilo_cp *cp) |
{ |
const uint8_t cmd_len = 6; |
const int cpp = gen6_translate_blt_cpp(value_mask); |
int dst_align, dst_pitch_shift; |
uint32_t dw0, dw1; |
dw0 = XY_COLOR_BLT_CMD | |
gen6_translate_blt_write_mask(write_mask) | |
(cmd_len - 2); |
if (dst_tiling == INTEL_TILING_NONE) { |
dst_align = 4; |
dst_pitch_shift = 0; |
} |
else { |
dw0 |= XY_DST_TILED; |
dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512; |
/* in dwords when tiled */ |
dst_pitch_shift = 2; |
} |
assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline); |
assert(y2 - y1 < gen6_max_scanlines); |
assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0); |
dw1 = rop << 16 | |
gen6_translate_blt_value_mask(value_mask) | |
dst_pitch >> dst_pitch_shift; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, dw0); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, y1 << 16 | x1); |
ilo_cp_write(cp, y2 << 16 | x2); |
ilo_cp_write_bo(cp, dst_offset, dst_bo, |
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); |
ilo_cp_write(cp, pattern); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_SRC_COPY_BLT(struct ilo_dev_info *dev, |
struct intel_bo *dst_bo, |
int16_t dst_pitch, uint32_t dst_offset, |
uint16_t width, uint16_t height, |
struct intel_bo *src_bo, |
int16_t src_pitch, uint32_t src_offset, |
bool dir_rtl, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask, |
struct ilo_cp *cp) |
{ |
const uint8_t cmd_len = 6; |
const int cpp = gen6_translate_blt_cpp(value_mask); |
uint32_t dw0, dw1; |
dw0 = SRC_COPY_BLT_CMD | |
gen6_translate_blt_write_mask(write_mask) | |
(cmd_len - 2); |
assert(width < gen6_max_bytes_per_scanline); |
assert(height < gen6_max_scanlines); |
/* offsets are naturally aligned and pitches are dword-aligned */ |
assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0); |
assert(src_offset % cpp == 0 && src_pitch % 4 == 0); |
dw1 = rop << 16 | |
gen6_translate_blt_value_mask(value_mask) | |
dst_pitch; |
if (dir_rtl) |
dw1 |= 1 << 30; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, dw0); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, height << 16 | width); |
ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, |
INTEL_DOMAIN_RENDER); |
ilo_cp_write(cp, src_pitch); |
ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_XY_SRC_COPY_BLT(struct ilo_dev_info *dev, |
struct intel_bo *dst_bo, |
enum intel_tiling_mode dst_tiling, |
int16_t dst_pitch, uint32_t dst_offset, |
int16_t x1, int16_t y1, int16_t x2, int16_t y2, |
struct intel_bo *src_bo, |
enum intel_tiling_mode src_tiling, |
int16_t src_pitch, uint32_t src_offset, |
int16_t src_x, int16_t src_y, uint8_t rop, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask, |
struct ilo_cp *cp) |
{ |
const uint8_t cmd_len = 8; |
const int cpp = gen6_translate_blt_cpp(value_mask); |
int dst_align, dst_pitch_shift; |
int src_align, src_pitch_shift; |
uint32_t dw0, dw1; |
dw0 = XY_SRC_COPY_BLT_CMD | |
gen6_translate_blt_write_mask(write_mask) | |
(cmd_len - 2); |
if (dst_tiling == INTEL_TILING_NONE) { |
dst_align = 4; |
dst_pitch_shift = 0; |
} |
else { |
dw0 |= XY_DST_TILED; |
dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512; |
/* in dwords when tiled */ |
dst_pitch_shift = 2; |
} |
if (src_tiling == INTEL_TILING_NONE) { |
src_align = 4; |
src_pitch_shift = 0; |
} |
else { |
dw0 |= XY_SRC_TILED; |
src_align = (src_tiling == INTEL_TILING_Y) ? 128 : 512; |
/* in dwords when tiled */ |
src_pitch_shift = 2; |
} |
assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline); |
assert(y2 - y1 < gen6_max_scanlines); |
assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0); |
assert(src_offset % src_align == 0 && src_pitch % src_align == 0); |
dw1 = rop << 16 | |
gen6_translate_blt_value_mask(value_mask) | |
dst_pitch >> dst_pitch_shift; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, dw0); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, y1 << 16 | x1); |
ilo_cp_write(cp, y2 << 16 | x2); |
ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, |
INTEL_DOMAIN_RENDER); |
ilo_cp_write(cp, src_y << 16 | src_x); |
ilo_cp_write(cp, src_pitch >> src_pitch_shift); |
ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0); |
ilo_cp_end(cp); |
} |
static uint32_t |
ilo_blitter_blt_begin(struct ilo_blitter *blitter, int max_cmd_size, |
struct intel_bo *dst, enum intel_tiling_mode dst_tiling, |
struct intel_bo *src, enum intel_tiling_mode src_tiling) |
{ |
struct ilo_context *ilo = blitter->ilo; |
struct intel_bo *aper_check[3]; |
int count; |
uint32_t swctrl; |
/* change ring */ |
ilo_cp_set_ring(ilo->cp, ILO_CP_RING_BLT); |
ilo_cp_set_owner(ilo->cp, NULL, 0); |
/* check aperture space */ |
aper_check[0] = ilo->cp->bo; |
aper_check[1] = dst; |
count = 2; |
if (src) { |
aper_check[2] = src; |
count++; |
} |
if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, count)) |
ilo_cp_flush(ilo->cp); |
/* set BCS_SWCTRL */ |
swctrl = 0x0; |
if (dst_tiling == INTEL_TILING_Y) { |
swctrl |= BCS_SWCTRL_DST_Y << 16 | |
BCS_SWCTRL_DST_Y; |
} |
if (src && src_tiling == INTEL_TILING_Y) { |
swctrl |= BCS_SWCTRL_SRC_Y << 16 | |
BCS_SWCTRL_SRC_Y; |
} |
if (swctrl) { |
/* |
* Most clients expect BLT engine to be stateless. If we have to set |
* BCS_SWCTRL to a non-default value, we have to set it back in the same |
* batch buffer. |
*/ |
if (ilo_cp_space(ilo->cp) < (4 + 3) * 2 + max_cmd_size) |
ilo_cp_flush(ilo->cp); |
ilo_cp_assert_no_implicit_flush(ilo->cp, true); |
/* |
* From the Ivy Bridge PRM, volume 1 part 4, page 133: |
* |
* "SW is required to flush the HW before changing the polarity of |
* this bit (Tile Y Destination/Source)." |
*/ |
gen6_emit_MI_FLUSH_DW(ilo->dev, ilo->cp); |
gen6_emit_MI_LOAD_REGISTER_IMM(ilo->dev, BCS_SWCTRL, swctrl, ilo->cp); |
swctrl &= ~(BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y); |
} |
return swctrl; |
} |
static void |
ilo_blitter_blt_end(struct ilo_blitter *blitter, uint32_t swctrl) |
{ |
struct ilo_context *ilo = blitter->ilo; |
/* set BCS_SWCTRL back */ |
if (swctrl) { |
gen6_emit_MI_FLUSH_DW(ilo->dev, ilo->cp); |
gen6_emit_MI_LOAD_REGISTER_IMM(ilo->dev, BCS_SWCTRL, swctrl, ilo->cp); |
ilo_cp_assert_no_implicit_flush(ilo->cp, false); |
} |
} |
static bool |
buf_clear_region(struct ilo_blitter *blitter, |
struct ilo_buffer *dst, |
unsigned dst_offset, unsigned dst_size, |
uint32_t val, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask) |
{ |
const uint8_t rop = 0xf0; /* PATCOPY */ |
const int cpp = gen6_translate_blt_cpp(value_mask); |
struct ilo_context *ilo = blitter->ilo; |
unsigned offset = 0; |
if (dst_offset % cpp || dst_size % cpp) |
return false; |
ilo_blitter_blt_begin(blitter, 0, |
dst->bo, INTEL_TILING_NONE, NULL, INTEL_TILING_NONE); |
while (dst_size) { |
unsigned width, height; |
int16_t pitch; |
width = dst_size; |
height = 1; |
pitch = 0; |
if (width > gen6_max_bytes_per_scanline) { |
/* less than INT16_MAX and dword-aligned */ |
pitch = 32764; |
width = pitch; |
height = dst_size / width; |
if (height > gen6_max_scanlines) |
height = gen6_max_scanlines; |
} |
gen6_emit_COLOR_BLT(ilo->dev, dst->bo, pitch, dst_offset + offset, |
width, height, val, rop, value_mask, write_mask, ilo->cp); |
offset += pitch * height; |
dst_size -= width * height; |
} |
ilo_blitter_blt_end(blitter, 0); |
return true; |
} |
static bool |
buf_copy_region(struct ilo_blitter *blitter, |
struct ilo_buffer *dst, unsigned dst_offset, |
struct ilo_buffer *src, unsigned src_offset, |
unsigned size) |
{ |
const uint8_t rop = 0xcc; /* SRCCOPY */ |
struct ilo_context *ilo = blitter->ilo; |
unsigned offset = 0; |
ilo_blitter_blt_begin(blitter, 0, |
dst->bo, INTEL_TILING_NONE, src->bo, INTEL_TILING_NONE); |
while (size) { |
unsigned width, height; |
int16_t pitch; |
width = size; |
height = 1; |
pitch = 0; |
if (width > gen6_max_bytes_per_scanline) { |
/* less than INT16_MAX and dword-aligned */ |
pitch = 32764; |
width = pitch; |
height = size / width; |
if (height > gen6_max_scanlines) |
height = gen6_max_scanlines; |
} |
gen6_emit_SRC_COPY_BLT(ilo->dev, |
dst->bo, pitch, dst_offset + offset, |
width, height, |
src->bo, pitch, src_offset + offset, |
false, rop, GEN6_BLT_MASK_8, GEN6_BLT_MASK_8, |
ilo->cp); |
offset += pitch * height; |
size -= width * height; |
} |
ilo_blitter_blt_end(blitter, 0); |
return true; |
} |
static bool |
tex_clear_region(struct ilo_blitter *blitter, |
struct ilo_texture *dst, unsigned dst_level, |
const struct pipe_box *dst_box, |
uint32_t val, |
enum gen6_blt_mask value_mask, |
enum gen6_blt_mask write_mask) |
{ |
const int cpp = gen6_translate_blt_cpp(value_mask); |
const unsigned max_extent = 32767; /* INT16_MAX */ |
const uint8_t rop = 0xf0; /* PATCOPY */ |
struct ilo_context *ilo = blitter->ilo; |
uint32_t swctrl; |
int slice; |
/* no W-tiling support */ |
if (dst->separate_s8) |
return false; |
if (dst->bo_stride > max_extent) |
return false; |
swctrl = ilo_blitter_blt_begin(blitter, dst_box->depth * 6, |
dst->bo, dst->tiling, NULL, INTEL_TILING_NONE); |
for (slice = 0; slice < dst_box->depth; slice++) { |
const struct ilo_texture_slice *dst_slice = |
&dst->slice_offsets[dst_level][dst_box->z + slice]; |
unsigned x1, y1, x2, y2; |
x1 = dst_slice->x + dst_box->x; |
y1 = dst_slice->y + dst_box->y; |
x2 = x1 + dst_box->width; |
y2 = y1 + dst_box->height; |
if (x2 > max_extent || y2 > max_extent || |
(x2 - x1) * cpp > gen6_max_bytes_per_scanline) |
break; |
gen6_emit_XY_COLOR_BLT(ilo->dev, |
dst->bo, dst->tiling, dst->bo_stride, 0, |
x1, y1, x2, y2, val, rop, value_mask, write_mask, |
ilo->cp); |
} |
ilo_blitter_blt_end(blitter, swctrl); |
return (slice == dst_box->depth); |
} |
static bool |
tex_copy_region(struct ilo_blitter *blitter, |
struct ilo_texture *dst, |
unsigned dst_level, |
unsigned dst_x, unsigned dst_y, unsigned dst_z, |
struct ilo_texture *src, |
unsigned src_level, |
const struct pipe_box *src_box) |
{ |
const struct util_format_description *desc = |
util_format_description(dst->bo_format); |
const unsigned max_extent = 32767; /* INT16_MAX */ |
const uint8_t rop = 0xcc; /* SRCCOPY */ |
struct ilo_context *ilo = blitter->ilo; |
enum gen6_blt_mask mask; |
uint32_t swctrl; |
int cpp, xscale, slice; |
/* no W-tiling support */ |
if (dst->separate_s8 || src->separate_s8) |
return false; |
if (dst->bo_stride > max_extent || src->bo_stride > max_extent) |
return false; |
cpp = desc->block.bits / 8; |
xscale = 1; |
/* accommodate for larger cpp */ |
if (cpp > 4) { |
if (cpp % 2 == 1) |
return false; |
cpp = (cpp % 4 == 0) ? 4 : 2; |
xscale = (desc->block.bits / 8) / cpp; |
} |
switch (cpp) { |
case 1: |
mask = GEN6_BLT_MASK_8; |
break; |
case 2: |
mask = GEN6_BLT_MASK_16; |
break; |
case 4: |
mask = GEN6_BLT_MASK_32; |
break; |
default: |
return false; |
break; |
} |
swctrl = ilo_blitter_blt_begin(blitter, src_box->depth * 8, |
dst->bo, dst->tiling, src->bo, src->tiling); |
for (slice = 0; slice < src_box->depth; slice++) { |
const struct ilo_texture_slice *dst_slice = |
&dst->slice_offsets[dst_level][dst_z + slice]; |
const struct ilo_texture_slice *src_slice = |
&src->slice_offsets[src_level][src_box->z + slice]; |
unsigned x1, y1, x2, y2, src_x, src_y; |
x1 = (dst_slice->x + dst_x) * xscale; |
y1 = dst_slice->y + dst_y; |
x2 = (x1 + src_box->width) * xscale; |
y2 = y1 + src_box->height; |
src_x = (src_slice->x + src_box->x) * xscale; |
src_y = src_slice->y + src_box->y; |
/* in blocks */ |
x1 /= desc->block.width; |
y1 /= desc->block.height; |
x2 = (x2 + desc->block.width - 1) / desc->block.width; |
y2 = (y2 + desc->block.height - 1) / desc->block.height; |
src_x /= desc->block.width; |
src_y /= desc->block.height; |
if (x2 > max_extent || y2 > max_extent || |
src_x > max_extent || src_y > max_extent || |
(x2 - x1) * cpp > gen6_max_bytes_per_scanline) |
break; |
gen6_emit_XY_SRC_COPY_BLT(ilo->dev, |
dst->bo, dst->tiling, dst->bo_stride, 0, |
x1, y1, x2, y2, |
src->bo, src->tiling, src->bo_stride, 0, |
src_x, src_y, rop, mask, mask, |
ilo->cp); |
} |
ilo_blitter_blt_end(blitter, swctrl); |
return (slice == src_box->depth); |
} |
bool |
ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter, |
struct pipe_resource *dst, unsigned dst_level, |
unsigned dst_x, unsigned dst_y, unsigned dst_z, |
struct pipe_resource *src, unsigned src_level, |
const struct pipe_box *src_box) |
{ |
bool success; |
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { |
const unsigned dst_offset = dst_x; |
const unsigned src_offset = src_box->x; |
const unsigned size = src_box->width; |
assert(dst_level == 0 && dst_y == 0 && dst_z == 0); |
assert(src_level == 0 && |
src_box->y == 0 && |
src_box->z == 0 && |
src_box->height == 1 && |
src_box->depth == 1); |
success = buf_copy_region(blitter, |
ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size); |
} |
else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) { |
success = tex_copy_region(blitter, |
ilo_texture(dst), dst_level, dst_x, dst_y, dst_z, |
ilo_texture(src), src_level, src_box); |
} |
else { |
success = false; |
} |
return success; |
} |
bool |
ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter, |
struct pipe_surface *rt, |
const union pipe_color_union *color, |
unsigned x, unsigned y, |
unsigned width, unsigned height) |
{ |
const int cpp = util_format_get_blocksize(rt->format); |
enum gen6_blt_mask mask; |
union util_color packed; |
bool success; |
if (!ilo_3d_pass_render_condition(blitter->ilo)) |
return true; |
switch (cpp) { |
case 1: |
mask = GEN6_BLT_MASK_8; |
break; |
case 2: |
mask = GEN6_BLT_MASK_16; |
break; |
case 4: |
mask = GEN6_BLT_MASK_32; |
break; |
default: |
return false; |
break; |
} |
if (util_format_is_pure_integer(rt->format) || |
util_format_is_compressed(rt->format)) |
return false; |
util_pack_color(color->f, rt->format, &packed); |
if (rt->texture->target == PIPE_BUFFER) { |
unsigned offset, end, size; |
assert(y == 0 && height == 1); |
offset = (rt->u.buf.first_element + x) * cpp; |
end = (rt->u.buf.last_element + 1) * cpp; |
size = width * cpp; |
if (offset + size > end) |
size = end - offset; |
success = buf_clear_region(blitter, ilo_buffer(rt->texture), |
offset, size, packed.ui, mask, mask); |
} |
else { |
struct pipe_box box; |
u_box_3d(x, y, rt->u.tex.first_layer, width, height, |
rt->u.tex.last_layer - rt->u.tex.first_layer + 1, &box); |
success = tex_clear_region(blitter, ilo_texture(rt->texture), |
rt->u.tex.level, &box, packed.ui, mask, mask); |
} |
return success; |
} |
bool |
ilo_blitter_blt_clear_zs(struct ilo_blitter *blitter, |
struct pipe_surface *zs, |
unsigned clear_flags, |
double depth, unsigned stencil, |
unsigned x, unsigned y, |
unsigned width, unsigned height) |
{ |
enum gen6_blt_mask value_mask, write_mask; |
struct pipe_box box; |
uint32_t val; |
if (!ilo_3d_pass_render_condition(blitter->ilo)) |
return true; |
switch (zs->format) { |
case PIPE_FORMAT_Z16_UNORM: |
if (!(clear_flags & PIPE_CLEAR_DEPTH)) |
return true; |
value_mask = GEN6_BLT_MASK_16; |
write_mask = GEN6_BLT_MASK_16; |
break; |
case PIPE_FORMAT_Z32_FLOAT: |
if (!(clear_flags & PIPE_CLEAR_DEPTH)) |
return true; |
value_mask = GEN6_BLT_MASK_32; |
write_mask = GEN6_BLT_MASK_32; |
break; |
case PIPE_FORMAT_Z24X8_UNORM: |
if (!(clear_flags & PIPE_CLEAR_DEPTH)) |
return true; |
value_mask = GEN6_BLT_MASK_32; |
write_mask = GEN6_BLT_MASK_32_LO; |
break; |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
if (!(clear_flags & PIPE_CLEAR_DEPTHSTENCIL)) |
return true; |
value_mask = GEN6_BLT_MASK_32; |
if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) |
write_mask = GEN6_BLT_MASK_32; |
else if (clear_flags & PIPE_CLEAR_DEPTH) |
write_mask = GEN6_BLT_MASK_32_LO; |
else |
write_mask = GEN6_BLT_MASK_32_HI; |
break; |
default: |
return false; |
break; |
} |
val = util_pack_z_stencil(zs->format, depth, stencil); |
u_box_3d(x, y, zs->u.tex.first_layer, width, height, |
zs->u.tex.last_layer - zs->u.tex.first_layer + 1, &box); |
assert(zs->texture->target != PIPE_BUFFER); |
return tex_clear_region(blitter, ilo_texture(zs->texture), |
zs->u.tex.level, &box, val, value_mask, write_mask); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter_pipe.c |
---|
0,0 → 1,229 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_blitter.h" |
#include "util/u_surface.h" |
#include "ilo_3d.h" |
#include "ilo_context.h" |
#include "ilo_blitter.h" |
enum ilo_blitter_pipe_op { |
ILO_BLITTER_PIPE_BLIT, |
ILO_BLITTER_PIPE_COPY, |
ILO_BLITTER_PIPE_CLEAR, |
ILO_BLITTER_PIPE_CLEAR_FB, |
}; |
static void |
ilo_blitter_pipe_begin(struct ilo_blitter *blitter, |
enum ilo_blitter_pipe_op op, |
bool scissor_enable) |
{ |
struct blitter_context *b = blitter->pipe_blitter; |
struct ilo_context *ilo = blitter->ilo; |
/* vertex states */ |
util_blitter_save_vertex_buffer_slot(b, ilo->vb.states); |
util_blitter_save_vertex_elements(b, (void *) ilo->ve); |
util_blitter_save_vertex_shader(b, ilo->vs); |
util_blitter_save_geometry_shader(b, ilo->gs); |
util_blitter_save_so_targets(b, ilo->so.count, ilo->so.states); |
util_blitter_save_rasterizer(b, (void *) ilo->rasterizer); |
/* fragment states */ |
util_blitter_save_fragment_shader(b, ilo->fs); |
util_blitter_save_depth_stencil_alpha(b, (void *) ilo->dsa); |
util_blitter_save_blend(b, (void *) ilo->blend); |
util_blitter_save_sample_mask(b, ilo->sample_mask); |
util_blitter_save_stencil_ref(b, &ilo->stencil_ref); |
util_blitter_save_viewport(b, &ilo->viewport.viewport0); |
if (scissor_enable) |
util_blitter_save_scissor(b, &ilo->scissor.scissor0); |
switch (op) { |
case ILO_BLITTER_PIPE_BLIT: |
case ILO_BLITTER_PIPE_COPY: |
/* |
* we are about to call util_blitter_blit() or |
* util_blitter_copy_texture() |
*/ |
util_blitter_save_fragment_sampler_states(b, |
ilo->sampler[PIPE_SHADER_FRAGMENT].count, |
(void **) ilo->sampler[PIPE_SHADER_FRAGMENT].cso); |
util_blitter_save_fragment_sampler_views(b, |
ilo->view[PIPE_SHADER_FRAGMENT].count, |
ilo->view[PIPE_SHADER_FRAGMENT].states); |
util_blitter_save_framebuffer(b, &ilo->fb.state); |
/* resource_copy_region() or blit() does not honor render condition */ |
util_blitter_save_render_condition(b, |
ilo->hw3d->render_condition.query, |
ilo->hw3d->render_condition.cond, |
ilo->hw3d->render_condition.mode); |
break; |
case ILO_BLITTER_PIPE_CLEAR: |
/* |
* we are about to call util_blitter_clear_render_target() or |
* util_blitter_clear_depth_stencil() |
*/ |
util_blitter_save_framebuffer(b, &ilo->fb.state); |
break; |
case ILO_BLITTER_PIPE_CLEAR_FB: |
/* we are about to call util_blitter_clear() */ |
break; |
default: |
break; |
} |
} |
static void |
ilo_blitter_pipe_end(struct ilo_blitter *blitter) |
{ |
} |
bool |
ilo_blitter_pipe_blit(struct ilo_blitter *blitter, |
const struct pipe_blit_info *info) |
{ |
struct blitter_context *b = blitter->pipe_blitter; |
struct pipe_blit_info skip_stencil; |
if (util_try_blit_via_copy_region(&blitter->ilo->base, info)) |
return true; |
if (!util_blitter_is_blit_supported(b, info)) { |
/* try without stencil */ |
if (info->mask & PIPE_MASK_S) { |
skip_stencil = *info; |
skip_stencil.mask = info->mask & ~PIPE_MASK_S; |
if (util_blitter_is_blit_supported(blitter->pipe_blitter, |
&skip_stencil)) { |
ilo_warn("ignore stencil buffer blitting\n"); |
info = &skip_stencil; |
} |
} |
if (info != &skip_stencil) { |
ilo_warn("failed to blit with pipe blitter\n"); |
return false; |
} |
} |
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_BLIT, |
info->scissor_enable); |
util_blitter_blit(b, info); |
ilo_blitter_pipe_end(blitter); |
return true; |
} |
bool |
ilo_blitter_pipe_copy_resource(struct ilo_blitter *blitter, |
struct pipe_resource *dst, unsigned dst_level, |
unsigned dst_x, unsigned dst_y, unsigned dst_z, |
struct pipe_resource *src, unsigned src_level, |
const struct pipe_box *src_box) |
{ |
const unsigned mask = PIPE_MASK_RGBAZS; |
const bool copy_all_samples = true; |
/* not until we allow rendertargets to be buffers */ |
if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER) |
return false; |
if (!util_blitter_is_copy_supported(blitter->pipe_blitter, dst, src, mask)) |
return false; |
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_COPY, false); |
util_blitter_copy_texture(blitter->pipe_blitter, |
dst, dst_level, dst_x, dst_y, dst_z, |
src, src_level, src_box, |
mask, copy_all_samples); |
ilo_blitter_pipe_end(blitter); |
return true; |
} |
bool |
ilo_blitter_pipe_clear_rt(struct ilo_blitter *blitter, |
struct pipe_surface *rt, |
const union pipe_color_union *color, |
unsigned x, unsigned y, |
unsigned width, unsigned height) |
{ |
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_CLEAR, false); |
util_blitter_clear_render_target(blitter->pipe_blitter, |
rt, color, x, y, width, height); |
ilo_blitter_pipe_end(blitter); |
return true; |
} |
bool |
ilo_blitter_pipe_clear_zs(struct ilo_blitter *blitter, |
struct pipe_surface *zs, |
unsigned clear_flags, |
double depth, unsigned stencil, |
unsigned x, unsigned y, |
unsigned width, unsigned height) |
{ |
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_CLEAR, false); |
util_blitter_clear_depth_stencil(blitter->pipe_blitter, |
zs, clear_flags, depth, stencil, x, y, width, height); |
ilo_blitter_pipe_end(blitter); |
return true; |
} |
bool |
ilo_blitter_pipe_clear_fb(struct ilo_blitter *blitter, |
unsigned buffers, |
const union pipe_color_union *color, |
double depth, unsigned stencil) |
{ |
/* TODO we should pause/resume some queries */ |
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_CLEAR_FB, false); |
util_blitter_clear(blitter->pipe_blitter, |
blitter->ilo->fb.state.width, blitter->ilo->fb.state.height, |
buffers, color, depth, stencil); |
ilo_blitter_pipe_end(blitter); |
return true; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_common.h |
---|
0,0 → 1,113 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_COMMON_H |
#define ILO_COMMON_H |
#include "pipe/p_compiler.h" |
#include "pipe/p_defines.h" |
#include "pipe/p_format.h" |
#include "util/u_debug.h" |
#include "util/u_double_list.h" |
#include "util/u_format.h" |
#include "util/u_inlines.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_pointer.h" |
#define ILO_GEN(gen) ((int) (gen * 100)) |
#define ILO_GEN_GET_MAJOR(gen) (gen / 100) |
enum ilo_debug { |
ILO_DEBUG_3D = 1 << 0, |
ILO_DEBUG_VS = 1 << 1, |
ILO_DEBUG_GS = 1 << 2, |
ILO_DEBUG_FS = 1 << 3, |
ILO_DEBUG_CS = 1 << 4, |
ILO_DEBUG_NOHW = 1 << 8, |
ILO_DEBUG_NOCACHE = 1 << 9, |
}; |
struct ilo_dev_info { |
/* these mirror intel_winsys_info */ |
int devid; |
bool has_llc; |
bool has_gen7_sol_reset; |
bool has_address_swizzling; |
int gen; |
int gt; |
int urb_size; |
}; |
extern int ilo_debug; |
/** |
* Print a message, for dumping or debugging. |
*/ |
static inline void _util_printf_format(1, 2) |
ilo_printf(const char *format, ...) |
{ |
va_list ap; |
va_start(ap, format); |
_debug_vprintf(format, ap); |
va_end(ap); |
} |
/** |
* Print a critical error. |
*/ |
static inline void _util_printf_format(1, 2) |
ilo_err(const char *format, ...) |
{ |
va_list ap; |
va_start(ap, format); |
_debug_vprintf(format, ap); |
va_end(ap); |
} |
/** |
* Print a warning, silenced for release builds. |
*/ |
static inline void _util_printf_format(1, 2) |
ilo_warn(const char *format, ...) |
{ |
#ifdef DEBUG |
va_list ap; |
va_start(ap, format); |
_debug_vprintf(format, ap); |
va_end(ap); |
#else |
#endif |
} |
#endif /* ILO_COMMON_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_context.c |
---|
0,0 → 1,190 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_upload_mgr.h" |
#include "intel_chipset.h" |
#include "ilo_3d.h" |
#include "ilo_blit.h" |
#include "ilo_blitter.h" |
#include "ilo_cp.h" |
#include "ilo_gpgpu.h" |
#include "ilo_query.h" |
#include "ilo_resource.h" |
#include "ilo_screen.h" |
#include "ilo_shader.h" |
#include "ilo_state.h" |
#include "ilo_transfer.h" |
#include "ilo_video.h" |
#include "ilo_context.h" |
static void |
ilo_context_cp_flushed(struct ilo_cp *cp, void *data) |
{ |
struct ilo_context *ilo = ilo_context(data); |
if (ilo->last_cp_bo) |
intel_bo_unreference(ilo->last_cp_bo); |
/* remember the just flushed bo, on which fences could wait */ |
ilo->last_cp_bo = cp->bo; |
intel_bo_reference(ilo->last_cp_bo); |
ilo_3d_cp_flushed(ilo->hw3d); |
} |
static void |
ilo_flush(struct pipe_context *pipe, |
struct pipe_fence_handle **f, |
unsigned flags) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
if (f) { |
struct ilo_fence *fence; |
fence = CALLOC_STRUCT(ilo_fence); |
if (fence) { |
pipe_reference_init(&fence->reference, 1); |
/* reference the batch bo that we want to wait on */ |
if (ilo_cp_empty(ilo->cp)) |
fence->bo = ilo->last_cp_bo; |
else |
fence->bo = ilo->cp->bo; |
if (fence->bo) |
intel_bo_reference(fence->bo); |
} |
*f = (struct pipe_fence_handle *) fence; |
} |
ilo_cp_flush(ilo->cp); |
} |
static void |
ilo_context_destroy(struct pipe_context *pipe) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_cleanup_states(ilo); |
if (ilo->last_cp_bo) |
intel_bo_unreference(ilo->last_cp_bo); |
if (ilo->uploader) |
u_upload_destroy(ilo->uploader); |
if (ilo->blitter) |
ilo_blitter_destroy(ilo->blitter); |
if (ilo->hw3d) |
ilo_3d_destroy(ilo->hw3d); |
if (ilo->shader_cache) |
ilo_shader_cache_destroy(ilo->shader_cache); |
if (ilo->cp) |
ilo_cp_destroy(ilo->cp); |
util_slab_destroy(&ilo->transfer_mempool); |
FREE(ilo); |
} |
static struct pipe_context * |
ilo_context_create(struct pipe_screen *screen, void *priv) |
{ |
struct ilo_screen *is = ilo_screen(screen); |
struct ilo_context *ilo; |
ilo = CALLOC_STRUCT(ilo_context); |
if (!ilo) |
return NULL; |
ilo->winsys = is->winsys; |
ilo->dev = &is->dev; |
/* |
* initialize first, otherwise it may not be safe to call |
* ilo_context_destroy() on errors |
*/ |
util_slab_create(&ilo->transfer_mempool, |
sizeof(struct ilo_transfer), 64, UTIL_SLAB_SINGLETHREADED); |
ilo->cp = ilo_cp_create(ilo->winsys, is->dev.has_llc); |
ilo->shader_cache = ilo_shader_cache_create(); |
if (ilo->cp) |
ilo->hw3d = ilo_3d_create(ilo->cp, ilo->dev); |
if (!ilo->cp || !ilo->shader_cache || !ilo->hw3d) { |
ilo_context_destroy(&ilo->base); |
return NULL; |
} |
ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024, 16, |
PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER); |
if (!ilo->uploader) { |
ilo_context_destroy(&ilo->base); |
return NULL; |
} |
ilo_cp_set_flush_callback(ilo->cp, |
ilo_context_cp_flushed, (void *) ilo); |
ilo->base.screen = screen; |
ilo->base.priv = priv; |
ilo->base.destroy = ilo_context_destroy; |
ilo->base.flush = ilo_flush; |
ilo_init_3d_functions(ilo); |
ilo_init_query_functions(ilo); |
ilo_init_state_functions(ilo); |
ilo_init_blit_functions(ilo); |
ilo_init_transfer_functions(ilo); |
ilo_init_video_functions(ilo); |
ilo_init_gpgpu_functions(ilo); |
ilo_init_states(ilo); |
/* this must be called last as u_blitter is a client of the pipe context */ |
ilo->blitter = ilo_blitter_create(ilo); |
if (!ilo->blitter) { |
ilo_context_destroy(&ilo->base); |
return NULL; |
} |
return &ilo->base; |
} |
/** |
* Initialize context-related functions. |
*/ |
void |
ilo_init_context_functions(struct ilo_screen *is) |
{ |
is->base.context_create = ilo_context_create; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_context.h |
---|
0,0 → 1,113 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_CONTEXT_H |
#define ILO_CONTEXT_H |
#include "pipe/p_context.h" |
#include "util/u_slab.h" |
#include "ilo_gpe.h" |
#include "ilo_common.h" |
struct pipe_draw_info; |
struct u_upload_mgr; |
struct intel_winsys; |
struct intel_bo; |
struct ilo_3d; |
struct ilo_blitter; |
struct ilo_cp; |
struct ilo_screen; |
struct ilo_shader_state; |
struct ilo_context { |
struct pipe_context base; |
struct intel_winsys *winsys; |
struct ilo_dev_info *dev; |
struct util_slab_mempool transfer_mempool; |
struct ilo_cp *cp; |
struct intel_bo *last_cp_bo; |
struct ilo_shader_cache *shader_cache; |
struct ilo_3d *hw3d; |
struct ilo_blitter *blitter; |
struct u_upload_mgr *uploader; |
const struct pipe_draw_info *draw; |
uint32_t dirty; |
struct ilo_vb_state vb; |
const struct ilo_ve_state *ve; |
struct ilo_ib_state ib; |
struct ilo_shader_state *vs; |
struct ilo_shader_state *gs; |
struct ilo_so_state so; |
struct pipe_clip_state clip; |
struct ilo_viewport_state viewport; |
struct ilo_scissor_state scissor; |
const struct ilo_rasterizer_state *rasterizer; |
struct pipe_poly_stipple poly_stipple; |
unsigned sample_mask; |
struct ilo_shader_state *fs; |
const struct ilo_dsa_state *dsa; |
struct pipe_stencil_ref stencil_ref; |
const struct ilo_blend_state *blend; |
struct pipe_blend_color blend_color; |
struct ilo_fb_state fb; |
/* shader resources */ |
struct ilo_sampler_state sampler[PIPE_SHADER_TYPES]; |
struct ilo_view_state view[PIPE_SHADER_TYPES]; |
struct ilo_cbuf_state cbuf[PIPE_SHADER_TYPES]; |
struct ilo_resource_state resource; |
/* GPGPU */ |
struct ilo_shader_state *cs; |
struct ilo_resource_state cs_resource; |
struct ilo_global_binding global_binding; |
}; |
static inline struct ilo_context * |
ilo_context(struct pipe_context *pipe) |
{ |
return (struct ilo_context *) pipe; |
} |
void |
ilo_init_context_functions(struct ilo_screen *is); |
#endif /* ILO_CONTEXT_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_cp.c |
---|
0,0 → 1,313 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "intel_reg.h" /* for MI_xxx */ |
#include "intel_winsys.h" |
#include "ilo_cp.h" |
/* the size of the private space */ |
static const int ilo_cp_private = 2; |
/** |
* Dump the contents of the parser bo. This can only be called in the flush |
* callback. |
*/ |
void |
ilo_cp_dump(struct ilo_cp *cp) |
{ |
ilo_printf("dumping %d bytes\n", cp->used * 4); |
if (cp->used) |
intel_winsys_decode_commands(cp->winsys, cp->bo, cp->used * 4); |
} |
/** |
* Save the command parser state for rewind. |
* |
* Note that this cannot rewind a flush, and the caller must make sure |
* that does not happend. |
*/ |
void |
ilo_cp_setjmp(struct ilo_cp *cp, struct ilo_cp_jmp_buf *jmp) |
{ |
jmp->id = pointer_to_intptr(cp->bo); |
jmp->size = cp->size; |
jmp->used = cp->used; |
jmp->stolen = cp->stolen; |
/* save reloc count to rewind ilo_cp_write_bo() */ |
jmp->reloc_count = intel_bo_get_reloc_count(cp->bo); |
} |
/** |
* Rewind to the saved state. |
*/ |
void |
ilo_cp_longjmp(struct ilo_cp *cp, const struct ilo_cp_jmp_buf *jmp) |
{ |
if (jmp->id != pointer_to_intptr(cp->bo)) { |
assert(!"invalid use of CP longjmp"); |
return; |
} |
cp->size = jmp->size; |
cp->used = jmp->used; |
cp->stolen = jmp->stolen; |
intel_bo_clear_relocs(cp->bo, jmp->reloc_count); |
} |
/** |
* Clear the parser buffer. |
*/ |
static void |
ilo_cp_clear_buffer(struct ilo_cp *cp) |
{ |
cp->cmd_cur = 0; |
cp->cmd_end = 0; |
cp->used = 0; |
cp->stolen = 0; |
/* |
* Recalculate cp->size. This is needed not only because cp->stolen is |
* reset above, but also that ilo_cp_private are added to cp->size in |
* ilo_cp_end_buffer(). |
*/ |
cp->size = cp->bo_size - ilo_cp_private; |
} |
/** |
* Add MI_BATCH_BUFFER_END to the private space of the parser buffer. |
*/ |
static void |
ilo_cp_end_buffer(struct ilo_cp *cp) |
{ |
/* make the private space available */ |
cp->size += ilo_cp_private; |
assert(cp->used + 2 <= cp->size); |
cp->ptr[cp->used++] = MI_BATCH_BUFFER_END; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 107: |
* |
* "The batch buffer must be QWord aligned and a multiple of QWords in |
* length." |
*/ |
if (cp->used & 1) |
cp->ptr[cp->used++] = MI_NOOP; |
} |
/** |
* Upload the parser buffer to the bo. |
*/ |
static int |
ilo_cp_upload_buffer(struct ilo_cp *cp) |
{ |
int err; |
if (!cp->sys) { |
intel_bo_unmap(cp->bo); |
return 0; |
} |
err = intel_bo_pwrite(cp->bo, 0, cp->used * 4, cp->ptr); |
if (likely(!err && cp->stolen)) { |
const int offset = cp->bo_size - cp->stolen; |
err = intel_bo_pwrite(cp->bo, offset * 4, |
cp->stolen * 4, &cp->ptr[offset]); |
} |
return err; |
} |
/** |
* Reallocate the parser bo. |
*/ |
static void |
ilo_cp_realloc_bo(struct ilo_cp *cp) |
{ |
struct intel_bo *bo; |
/* |
* allocate the new bo before unreferencing the old one so that they |
* won't point at the same address, which is needed for jmpbuf |
*/ |
bo = intel_winsys_alloc_buffer(cp->winsys, |
"batch buffer", cp->bo_size * 4, 0); |
if (unlikely(!bo)) { |
/* reuse the old one */ |
bo = cp->bo; |
intel_bo_reference(bo); |
} |
if (cp->bo) |
intel_bo_unreference(cp->bo); |
cp->bo = bo; |
if (!cp->sys) { |
intel_bo_map(cp->bo, true); |
cp->ptr = intel_bo_get_virtual(cp->bo); |
} |
} |
/** |
* Execute the parser bo. |
*/ |
static int |
ilo_cp_exec_bo(struct ilo_cp *cp) |
{ |
const bool do_exec = !(ilo_debug & ILO_DEBUG_NOHW); |
struct intel_context *ctx; |
unsigned long flags; |
int err; |
switch (cp->ring) { |
case ILO_CP_RING_RENDER: |
ctx = cp->render_ctx; |
flags = INTEL_EXEC_RENDER; |
break; |
case ILO_CP_RING_BLT: |
ctx = NULL; |
flags = INTEL_EXEC_BLT; |
break; |
default: |
ctx = NULL; |
flags = 0; |
break; |
} |
flags |= cp->one_off_flags; |
if (likely(do_exec)) |
err = intel_bo_exec(cp->bo, cp->used * 4, ctx, flags); |
else |
err = 0; |
cp->one_off_flags = 0; |
return err; |
} |
/** |
* Flush the command parser and execute the commands. When the parser buffer |
* is empty, the callback is not invoked. |
*/ |
void |
ilo_cp_flush(struct ilo_cp *cp) |
{ |
int err; |
ilo_cp_set_owner(cp, NULL, 0); |
/* sanity check */ |
assert(cp->bo_size == cp->size + cp->stolen + ilo_cp_private); |
if (!cp->used) { |
/* return the space stolen and etc. */ |
ilo_cp_clear_buffer(cp); |
return; |
} |
ilo_cp_end_buffer(cp); |
/* upload and execute */ |
err = ilo_cp_upload_buffer(cp); |
if (likely(!err)) |
err = ilo_cp_exec_bo(cp); |
if (likely(!err && cp->flush_callback)) |
cp->flush_callback(cp, cp->flush_callback_data); |
ilo_cp_clear_buffer(cp); |
ilo_cp_realloc_bo(cp); |
} |
/** |
* Destroy the command parser. |
*/ |
void |
ilo_cp_destroy(struct ilo_cp *cp) |
{ |
if (cp->bo) { |
if (!cp->sys) |
intel_bo_unmap(cp->bo); |
intel_bo_unreference(cp->bo); |
} |
if (cp->render_ctx) |
intel_winsys_destroy_context(cp->winsys, cp->render_ctx); |
FREE(cp->sys); |
FREE(cp); |
} |
/** |
* Create a command parser. |
*/ |
struct ilo_cp * |
ilo_cp_create(struct intel_winsys *winsys, bool direct_map) |
{ |
struct ilo_cp *cp; |
cp = CALLOC_STRUCT(ilo_cp); |
if (!cp) |
return NULL; |
cp->winsys = winsys; |
cp->render_ctx = intel_winsys_create_context(winsys); |
cp->ring = ILO_CP_RING_RENDER; |
cp->no_implicit_flush = false; |
cp->bo_size = 8192; |
if (!direct_map) { |
cp->sys = MALLOC(cp->bo_size * 4); |
if (!cp->sys) { |
FREE(cp); |
return NULL; |
} |
cp->ptr = cp->sys; |
} |
ilo_cp_realloc_bo(cp); |
if (!cp->bo) { |
FREE(cp->sys); |
FREE(cp); |
return NULL; |
} |
ilo_cp_clear_buffer(cp); |
return cp; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_cp.h |
---|
0,0 → 1,363 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_CP_H |
#define ILO_CP_H |
#include "intel_winsys.h" |
#include "ilo_common.h" |
struct ilo_cp; |
enum ilo_cp_ring { |
ILO_CP_RING_RENDER, |
ILO_CP_RING_BLT, |
ILO_CP_RING_COUNT, |
}; |
typedef void (*ilo_cp_callback)(struct ilo_cp *cp, void *data); |
struct ilo_cp_owner { |
ilo_cp_callback release_callback; |
void *release_data; |
}; |
/** |
* Command parser. |
*/ |
struct ilo_cp { |
struct intel_winsys *winsys; |
struct intel_context *render_ctx; |
ilo_cp_callback flush_callback; |
void *flush_callback_data; |
const struct ilo_cp_owner *owner; |
int owner_reserve; |
enum ilo_cp_ring ring; |
bool no_implicit_flush; |
unsigned one_off_flags; |
int bo_size; |
struct intel_bo *bo; |
uint32_t *sys; |
uint32_t *ptr; |
int size, used, stolen; |
int cmd_cur, cmd_end; |
}; |
/** |
* Jump buffer to save command parser state for rewind. |
*/ |
struct ilo_cp_jmp_buf { |
intptr_t id; |
int size, used, stolen; |
int reloc_count; |
}; |
struct ilo_cp * |
ilo_cp_create(struct intel_winsys *winsys, bool direct_map); |
void |
ilo_cp_destroy(struct ilo_cp *cp); |
void |
ilo_cp_flush(struct ilo_cp *cp); |
void |
ilo_cp_dump(struct ilo_cp *cp); |
void |
ilo_cp_setjmp(struct ilo_cp *cp, struct ilo_cp_jmp_buf *jmp); |
void |
ilo_cp_longjmp(struct ilo_cp *cp, const struct ilo_cp_jmp_buf *jmp); |
/** |
* Return true if the parser buffer is empty. |
*/ |
static inline bool |
ilo_cp_empty(struct ilo_cp *cp) |
{ |
return !cp->used; |
} |
/** |
* Return the remaining space (in dwords) in the parser buffer. |
*/ |
static inline int |
ilo_cp_space(struct ilo_cp *cp) |
{ |
return cp->size - cp->used; |
} |
/** |
* Internal function called by functions that flush implicitly. |
*/ |
static inline void |
ilo_cp_implicit_flush(struct ilo_cp *cp) |
{ |
if (cp->no_implicit_flush) { |
assert(!"unexpected command parser flush"); |
/* discard the commands */ |
cp->used = 0; |
} |
ilo_cp_flush(cp); |
} |
/** |
* Set the ring buffer. |
*/ |
static inline void |
ilo_cp_set_ring(struct ilo_cp *cp, enum ilo_cp_ring ring) |
{ |
if (cp->ring != ring) { |
ilo_cp_implicit_flush(cp); |
cp->ring = ring; |
} |
} |
/** |
* Assert that no function should flush implicitly. |
*/ |
static inline void |
ilo_cp_assert_no_implicit_flush(struct ilo_cp *cp, bool enable) |
{ |
cp->no_implicit_flush = enable; |
} |
/** |
* Set one-off flags. They will be cleared after flushing. |
*/ |
static inline void |
ilo_cp_set_one_off_flags(struct ilo_cp *cp, unsigned flags) |
{ |
cp->one_off_flags |= flags; |
} |
/** |
* Set flush callback. The callback is invoked after the bo has been |
* successfully executed, and before the bo is reallocated. |
*/ |
static inline void |
ilo_cp_set_flush_callback(struct ilo_cp *cp, ilo_cp_callback callback, |
void *data) |
{ |
cp->flush_callback = callback; |
cp->flush_callback_data = data; |
} |
/** |
* Set the parser owner. If this is a new owner, the previous owner is |
* notified and the space it reserved is reclaimed. |
* |
* \return true if this is a new owner |
*/ |
static inline bool |
ilo_cp_set_owner(struct ilo_cp *cp, const struct ilo_cp_owner *owner, |
int reserve) |
{ |
const bool new_owner = (cp->owner != owner); |
/* release current owner */ |
if (new_owner && cp->owner) { |
const bool no_implicit_flush = cp->no_implicit_flush; |
/* reclaim the reserved space */ |
cp->size += cp->owner_reserve; |
cp->owner_reserve = 0; |
/* invoke the release callback */ |
cp->no_implicit_flush = true; |
cp->owner->release_callback(cp, cp->owner->release_data); |
cp->no_implicit_flush = no_implicit_flush; |
cp->owner = NULL; |
} |
if (cp->owner_reserve != reserve) { |
const int extra = reserve - cp->owner_reserve; |
if (cp->used > cp->size - extra) { |
ilo_cp_implicit_flush(cp); |
assert(cp->used <= cp->size - reserve); |
cp->size -= reserve; |
cp->owner_reserve = reserve; |
} |
else { |
cp->size -= extra; |
cp->owner_reserve += extra; |
} |
} |
/* set owner last because of the possible flush above */ |
cp->owner = owner; |
return new_owner; |
} |
/** |
* Begin writing a command. |
*/ |
static inline void |
ilo_cp_begin(struct ilo_cp *cp, int cmd_size) |
{ |
if (cp->used + cmd_size > cp->size) { |
ilo_cp_implicit_flush(cp); |
assert(cp->used + cmd_size <= cp->size); |
} |
assert(cp->cmd_cur == cp->cmd_end); |
cp->cmd_cur = cp->used; |
cp->cmd_end = cp->cmd_cur + cmd_size; |
cp->used = cp->cmd_end; |
} |
/** |
* Begin writing data to a space stolen from the top of the parser buffer. |
* |
* \param desc informative description of the data to be written |
* \param data_size in dwords |
* \param align in dwords |
* \param bo_offset in bytes to the stolen space |
*/ |
static inline void |
ilo_cp_steal(struct ilo_cp *cp, const char *desc, |
int data_size, int align, uint32_t *bo_offset) |
{ |
int pad, steal; |
if (!align) |
align = 1; |
pad = (cp->bo_size - cp->stolen - data_size) % align; |
steal = data_size + pad; |
/* flush if there is not enough space after stealing */ |
if (cp->used > cp->size - steal) { |
ilo_cp_implicit_flush(cp); |
pad = (cp->bo_size - cp->stolen - data_size) % align; |
steal = data_size + steal; |
assert(cp->used <= cp->size - steal); |
} |
cp->size -= steal; |
cp->stolen += steal; |
assert(cp->cmd_cur == cp->cmd_end); |
cp->cmd_cur = cp->bo_size - cp->stolen; |
cp->cmd_end = cp->cmd_cur + data_size; |
/* offset in cp->bo */ |
if (bo_offset) |
*bo_offset = cp->cmd_cur * 4; |
} |
/** |
* Write a dword to the parser buffer. This function must be enclosed by |
* ilo_cp_begin()/ilo_cp_steal() and ilo_cp_end(). |
*/ |
static inline void |
ilo_cp_write(struct ilo_cp *cp, uint32_t val) |
{ |
assert(cp->cmd_cur < cp->cmd_end); |
cp->ptr[cp->cmd_cur++] = val; |
} |
/** |
* Write multiple dwords to the parser buffer. |
*/ |
static inline void |
ilo_cp_write_multi(struct ilo_cp *cp, const void *vals, int num_vals) |
{ |
assert(cp->cmd_cur + num_vals <= cp->cmd_end); |
memcpy(cp->ptr + cp->cmd_cur, vals, num_vals * 4); |
cp->cmd_cur += num_vals; |
} |
/** |
* Write a bo to the parser buffer. In addition to writing the offset of the |
* bo to the buffer, it also emits a relocation. |
*/ |
static inline void |
ilo_cp_write_bo(struct ilo_cp *cp, uint32_t val, struct intel_bo *bo, |
uint32_t read_domains, uint32_t write_domain) |
{ |
if (bo) { |
intel_bo_emit_reloc(cp->bo, cp->cmd_cur * 4, |
bo, val, read_domains, write_domain); |
ilo_cp_write(cp, val + intel_bo_get_offset(bo)); |
} |
else { |
ilo_cp_write(cp, val); |
} |
} |
/** |
* End a command. Every ilo_cp_begin() or ilo_cp_steal() must have a |
* matching ilo_cp_end(). |
*/ |
static inline void |
ilo_cp_end(struct ilo_cp *cp) |
{ |
assert(cp->cmd_cur == cp->cmd_end); |
} |
/** |
* A variant of ilo_cp_steal() where the data are written via the returned |
* pointer. |
* |
* \return ptr pointer where the data are written to. It is valid until any |
* change is made to the parser. |
*/ |
static inline void * |
ilo_cp_steal_ptr(struct ilo_cp *cp, const char *desc, |
int data_size, int align, uint32_t *bo_offset) |
{ |
void *ptr; |
ilo_cp_steal(cp, desc, data_size, align, bo_offset); |
ptr = &cp->ptr[cp->cmd_cur]; |
cp->cmd_cur = cp->cmd_end; |
ilo_cp_end(cp); |
return ptr; |
} |
#endif /* ILO_CP_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_format.c |
---|
0,0 → 1,687 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "vl/vl_video_buffer.h" |
#include "brw_defines.h" |
#include "ilo_screen.h" |
#include "ilo_format.h" |
/* stolen from classic i965 */ |
struct surface_format_info { |
bool exists; |
int sampling; |
int filtering; |
int shadow_compare; |
int chroma_key; |
int render_target; |
int alpha_blend; |
int input_vb; |
int streamed_output_vb; |
int color_processing; |
}; |
/* This macro allows us to write the table almost as it appears in the PRM, |
* while restructuring it to turn it into the C code we want. |
*/ |
#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \ |
[sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color }, |
#define Y 0 |
#define x 999 |
/** |
* This is the table of support for surface (texture, renderbuffer, and vertex |
* buffer, but not depthbuffer) formats across the various hardware generations. |
* |
* The table is formatted to match the documentation, except that the docs have |
* this ridiculous mapping of Y[*+~^#&] for "supported on DevWhatever". To put |
* it in our table, here's the mapping: |
* |
* Y*: 45 |
* Y+: 45 (g45/gm45) |
* Y~: 50 (gen5) |
* Y^: 60 (gen6) |
* Y#: 70 (gen7) |
* |
* The abbreviations in the header below are: |
* smpl - Sampling Engine |
* filt - Sampling Engine Filtering |
* shad - Sampling Engine Shadow Map |
* CK - Sampling Engine Chroma Key |
* RT - Render Target |
* AB - Alpha Blend Render Target |
* VB - Input Vertex Buffer |
* SO - Steamed Output Vertex Buffers (transform feedback) |
* color - Color Processing |
* |
* See page 88 of the Sandybridge PRM VOL4_Part1 PDF. |
* |
* As of Ivybridge, the columns are no longer in that table and the |
* information can be found spread across: |
* |
* - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch). |
* - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping. |
* - VOL4_Part1 section 3.9.11 Render Target Write. |
*/ |
const struct surface_format_info surface_formats[] = { |
/* smpl filt shad CK RT AB VB SO color */ |
SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_FLOAT) |
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_SINT) |
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_UINT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_UNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64_FLOAT) |
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64_PASSTHRU) |
SF( Y, 50, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_FLOAT) |
SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_SINT) |
SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_UINT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_UNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_USCALED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32_SFIXED) |
SF( Y, Y, x, x, Y, 45, Y, x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM) |
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SINT) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_UINT) |
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT) |
SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT) |
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_SINT) |
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_UINT) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS) |
SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT) |
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32A32_FLOAT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_UNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64_FLOAT) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_FLOAT) |
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32X32_FLOAT) |
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32X32_FLOAT) |
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32X32_FLOAT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_USCALED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32_SFIXED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64_PASSTHRU) |
SF( Y, Y, x, Y, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM) |
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB) |
/* smpl filt shad CK RT AB VB SO color */ |
SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM) |
SF( Y, Y, x, x, x, x, x, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10A2_UINT) |
SF( Y, Y, x, x, x, Y, Y, x, x, BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM) |
SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM) |
SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB) |
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SNORM) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SINT) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_UINT) |
SF( Y, Y, x, x, Y, 45, Y, x, x, BRW_SURFACEFORMAT_R16G16_UNORM) |
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16_SNORM) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SINT) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_UINT) |
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16_FLOAT) |
SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM) |
SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB) |
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R11G11B10_FLOAT) |
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_SINT) |
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_UINT) |
SF( Y, 50, Y, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32_FLOAT) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS) |
SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_UNORM) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I24X8_UNORM) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L24X8_UNORM) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A24X8_UNORM) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32_FLOAT) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32_FLOAT) |
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32_FLOAT) |
SF( Y, Y, x, Y, x, x, x, x, 60, BRW_SURFACEFORMAT_B8G8R8X8_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10X2_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_FLOAT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_UNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SNORM) |
/* smpl filt shad CK RT AB VB SO color */ |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10X2_USCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_USCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_USCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_USCALED) |
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM) |
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB) |
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM) |
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB) |
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM) |
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB) |
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8G8_UNORM) |
SF( Y, Y, x, Y, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8_SNORM) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SINT) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_UINT) |
SF( Y, Y, Y, x, Y, 45, Y, x, 70, BRW_SURFACEFORMAT_R16_UNORM) |
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16_SNORM) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_SINT) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_UINT) |
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16_FLOAT) |
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0) |
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1) |
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_UNORM) |
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_UNORM) |
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_UNORM) |
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM) |
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_FLOAT) |
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_FLOAT) |
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_FLOAT) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM_SRGB) |
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM) |
SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM) |
SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_USCALED) |
/* smpl filt shad CK RT AB VB SO color */ |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_USCALED) |
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0) |
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_SINT) |
SF( Y, Y, x, 45, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8_UNORM) |
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8_SNORM) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_SINT) |
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_UINT) |
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_A8_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UNORM) |
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_USCALED) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM_SRGB) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_Y8_SNORM) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_SINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_SINT) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R1_UINT) |
SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL) |
SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0) |
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1) |
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM) |
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM) |
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_UNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM_SRGB) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM_SRGB) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM_SRGB) |
SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_MONO8) |
SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUV) |
SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPY) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB) |
/* smpl filt shad CK RT AB VB SO color */ |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_FXT1) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_USCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64A64_FLOAT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64_FLOAT) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_SNORM) |
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_SNORM) |
SF(50, 50, x, x, x, x, 60, x, x, BRW_SURFACEFORMAT_R16G16B16_FLOAT) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_UNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SNORM) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SSCALED) |
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_USCALED) |
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_SF16) |
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM) |
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB) |
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_UF16) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_PLANAR_420_8) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC1_RGB8) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_R11) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_RG11) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_R11) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_UINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_SINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_SFIXED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_UINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UINT) |
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_SINT) |
}; |
#undef x |
#undef Y |
static const struct surface_format_info * |
lookup_surface_format_info(enum pipe_format format, unsigned bind) |
{ |
static const struct surface_format_info nonexist = { |
.exists = false, |
.sampling = 999, |
.filtering = 999, |
.shadow_compare = 999, |
.chroma_key = 999, |
.render_target = 999, |
.alpha_blend = 999, |
.input_vb = 999, |
.streamed_output_vb = 999, |
.color_processing = 999, |
}; |
const int surfaceformat = ilo_translate_format(format, bind); |
return (surfaceformat >= 0 && surfaceformat < Elements(surface_formats) && |
surface_formats[surfaceformat].exists) ? |
&surface_formats[surfaceformat] : &nonexist; |
} |
/** |
* Translate a color (non-depth/stencil) pipe format to the matching hardware |
* format. Return -1 on errors. |
*/ |
int |
ilo_translate_color_format(enum pipe_format format) |
{ |
static const int format_mapping[PIPE_FORMAT_COUNT] = { |
[PIPE_FORMAT_NONE] = 0, |
[PIPE_FORMAT_B8G8R8A8_UNORM] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM, |
[PIPE_FORMAT_B8G8R8X8_UNORM] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM, |
[PIPE_FORMAT_A8R8G8B8_UNORM] = 0, |
[PIPE_FORMAT_X8R8G8B8_UNORM] = 0, |
[PIPE_FORMAT_B5G5R5A1_UNORM] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM, |
[PIPE_FORMAT_B4G4R4A4_UNORM] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM, |
[PIPE_FORMAT_B5G6R5_UNORM] = BRW_SURFACEFORMAT_B5G6R5_UNORM, |
[PIPE_FORMAT_R10G10B10A2_UNORM] = BRW_SURFACEFORMAT_R10G10B10A2_UNORM, |
[PIPE_FORMAT_L8_UNORM] = BRW_SURFACEFORMAT_L8_UNORM, |
[PIPE_FORMAT_A8_UNORM] = BRW_SURFACEFORMAT_A8_UNORM, |
[PIPE_FORMAT_I8_UNORM] = BRW_SURFACEFORMAT_I8_UNORM, |
[PIPE_FORMAT_L8A8_UNORM] = BRW_SURFACEFORMAT_L8A8_UNORM, |
[PIPE_FORMAT_L16_UNORM] = BRW_SURFACEFORMAT_L16_UNORM, |
[PIPE_FORMAT_UYVY] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY, |
[PIPE_FORMAT_YUYV] = BRW_SURFACEFORMAT_YCRCB_NORMAL, |
[PIPE_FORMAT_Z16_UNORM] = 0, |
[PIPE_FORMAT_Z32_UNORM] = 0, |
[PIPE_FORMAT_Z32_FLOAT] = 0, |
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = 0, |
[PIPE_FORMAT_S8_UINT_Z24_UNORM] = 0, |
[PIPE_FORMAT_Z24X8_UNORM] = 0, |
[PIPE_FORMAT_X8Z24_UNORM] = 0, |
[PIPE_FORMAT_S8_UINT] = 0, |
[PIPE_FORMAT_R64_FLOAT] = BRW_SURFACEFORMAT_R64_FLOAT, |
[PIPE_FORMAT_R64G64_FLOAT] = BRW_SURFACEFORMAT_R64G64_FLOAT, |
[PIPE_FORMAT_R64G64B64_FLOAT] = BRW_SURFACEFORMAT_R64G64B64_FLOAT, |
[PIPE_FORMAT_R64G64B64A64_FLOAT] = BRW_SURFACEFORMAT_R64G64B64A64_FLOAT, |
[PIPE_FORMAT_R32_FLOAT] = BRW_SURFACEFORMAT_R32_FLOAT, |
[PIPE_FORMAT_R32G32_FLOAT] = BRW_SURFACEFORMAT_R32G32_FLOAT, |
[PIPE_FORMAT_R32G32B32_FLOAT] = BRW_SURFACEFORMAT_R32G32B32_FLOAT, |
[PIPE_FORMAT_R32G32B32A32_FLOAT] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT, |
[PIPE_FORMAT_R32_UNORM] = BRW_SURFACEFORMAT_R32_UNORM, |
[PIPE_FORMAT_R32G32_UNORM] = BRW_SURFACEFORMAT_R32G32_UNORM, |
[PIPE_FORMAT_R32G32B32_UNORM] = BRW_SURFACEFORMAT_R32G32B32_UNORM, |
[PIPE_FORMAT_R32G32B32A32_UNORM] = BRW_SURFACEFORMAT_R32G32B32A32_UNORM, |
[PIPE_FORMAT_R32_USCALED] = BRW_SURFACEFORMAT_R32_USCALED, |
[PIPE_FORMAT_R32G32_USCALED] = BRW_SURFACEFORMAT_R32G32_USCALED, |
[PIPE_FORMAT_R32G32B32_USCALED] = BRW_SURFACEFORMAT_R32G32B32_USCALED, |
[PIPE_FORMAT_R32G32B32A32_USCALED] = BRW_SURFACEFORMAT_R32G32B32A32_USCALED, |
[PIPE_FORMAT_R32_SNORM] = BRW_SURFACEFORMAT_R32_SNORM, |
[PIPE_FORMAT_R32G32_SNORM] = BRW_SURFACEFORMAT_R32G32_SNORM, |
[PIPE_FORMAT_R32G32B32_SNORM] = BRW_SURFACEFORMAT_R32G32B32_SNORM, |
[PIPE_FORMAT_R32G32B32A32_SNORM] = BRW_SURFACEFORMAT_R32G32B32A32_SNORM, |
[PIPE_FORMAT_R32_SSCALED] = BRW_SURFACEFORMAT_R32_SSCALED, |
[PIPE_FORMAT_R32G32_SSCALED] = BRW_SURFACEFORMAT_R32G32_SSCALED, |
[PIPE_FORMAT_R32G32B32_SSCALED] = BRW_SURFACEFORMAT_R32G32B32_SSCALED, |
[PIPE_FORMAT_R32G32B32A32_SSCALED] = BRW_SURFACEFORMAT_R32G32B32A32_SSCALED, |
[PIPE_FORMAT_R16_UNORM] = BRW_SURFACEFORMAT_R16_UNORM, |
[PIPE_FORMAT_R16G16_UNORM] = BRW_SURFACEFORMAT_R16G16_UNORM, |
[PIPE_FORMAT_R16G16B16_UNORM] = BRW_SURFACEFORMAT_R16G16B16_UNORM, |
[PIPE_FORMAT_R16G16B16A16_UNORM] = BRW_SURFACEFORMAT_R16G16B16A16_UNORM, |
[PIPE_FORMAT_R16_USCALED] = BRW_SURFACEFORMAT_R16_USCALED, |
[PIPE_FORMAT_R16G16_USCALED] = BRW_SURFACEFORMAT_R16G16_USCALED, |
[PIPE_FORMAT_R16G16B16_USCALED] = BRW_SURFACEFORMAT_R16G16B16_USCALED, |
[PIPE_FORMAT_R16G16B16A16_USCALED] = BRW_SURFACEFORMAT_R16G16B16A16_USCALED, |
[PIPE_FORMAT_R16_SNORM] = BRW_SURFACEFORMAT_R16_SNORM, |
[PIPE_FORMAT_R16G16_SNORM] = BRW_SURFACEFORMAT_R16G16_SNORM, |
[PIPE_FORMAT_R16G16B16_SNORM] = BRW_SURFACEFORMAT_R16G16B16_SNORM, |
[PIPE_FORMAT_R16G16B16A16_SNORM] = BRW_SURFACEFORMAT_R16G16B16A16_SNORM, |
[PIPE_FORMAT_R16_SSCALED] = BRW_SURFACEFORMAT_R16_SSCALED, |
[PIPE_FORMAT_R16G16_SSCALED] = BRW_SURFACEFORMAT_R16G16_SSCALED, |
[PIPE_FORMAT_R16G16B16_SSCALED] = BRW_SURFACEFORMAT_R16G16B16_SSCALED, |
[PIPE_FORMAT_R16G16B16A16_SSCALED] = BRW_SURFACEFORMAT_R16G16B16A16_SSCALED, |
[PIPE_FORMAT_R8_UNORM] = BRW_SURFACEFORMAT_R8_UNORM, |
[PIPE_FORMAT_R8G8_UNORM] = BRW_SURFACEFORMAT_R8G8_UNORM, |
[PIPE_FORMAT_R8G8B8_UNORM] = BRW_SURFACEFORMAT_R8G8B8_UNORM, |
[PIPE_FORMAT_R8G8B8A8_UNORM] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM, |
[PIPE_FORMAT_X8B8G8R8_UNORM] = 0, |
[PIPE_FORMAT_R8_USCALED] = BRW_SURFACEFORMAT_R8_USCALED, |
[PIPE_FORMAT_R8G8_USCALED] = BRW_SURFACEFORMAT_R8G8_USCALED, |
[PIPE_FORMAT_R8G8B8_USCALED] = BRW_SURFACEFORMAT_R8G8B8_USCALED, |
[PIPE_FORMAT_R8G8B8A8_USCALED] = BRW_SURFACEFORMAT_R8G8B8A8_USCALED, |
[PIPE_FORMAT_R8_SNORM] = BRW_SURFACEFORMAT_R8_SNORM, |
[PIPE_FORMAT_R8G8_SNORM] = BRW_SURFACEFORMAT_R8G8_SNORM, |
[PIPE_FORMAT_R8G8B8_SNORM] = BRW_SURFACEFORMAT_R8G8B8_SNORM, |
[PIPE_FORMAT_R8G8B8A8_SNORM] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM, |
[PIPE_FORMAT_R8_SSCALED] = BRW_SURFACEFORMAT_R8_SSCALED, |
[PIPE_FORMAT_R8G8_SSCALED] = BRW_SURFACEFORMAT_R8G8_SSCALED, |
[PIPE_FORMAT_R8G8B8_SSCALED] = BRW_SURFACEFORMAT_R8G8B8_SSCALED, |
[PIPE_FORMAT_R8G8B8A8_SSCALED] = BRW_SURFACEFORMAT_R8G8B8A8_SSCALED, |
[PIPE_FORMAT_R32_FIXED] = BRW_SURFACEFORMAT_R32_SFIXED, |
[PIPE_FORMAT_R32G32_FIXED] = BRW_SURFACEFORMAT_R32G32_SFIXED, |
[PIPE_FORMAT_R32G32B32_FIXED] = BRW_SURFACEFORMAT_R32G32B32_SFIXED, |
[PIPE_FORMAT_R32G32B32A32_FIXED] = BRW_SURFACEFORMAT_R32G32B32A32_SFIXED, |
[PIPE_FORMAT_R16_FLOAT] = BRW_SURFACEFORMAT_R16_FLOAT, |
[PIPE_FORMAT_R16G16_FLOAT] = BRW_SURFACEFORMAT_R16G16_FLOAT, |
[PIPE_FORMAT_R16G16B16_FLOAT] = BRW_SURFACEFORMAT_R16G16B16_FLOAT, |
[PIPE_FORMAT_R16G16B16A16_FLOAT] = BRW_SURFACEFORMAT_R16G16B16A16_FLOAT, |
[PIPE_FORMAT_L8_SRGB] = BRW_SURFACEFORMAT_L8_UNORM_SRGB, |
[PIPE_FORMAT_L8A8_SRGB] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB, |
[PIPE_FORMAT_R8G8B8_SRGB] = BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB, |
[PIPE_FORMAT_A8B8G8R8_SRGB] = 0, |
[PIPE_FORMAT_X8B8G8R8_SRGB] = 0, |
[PIPE_FORMAT_B8G8R8A8_SRGB] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB, |
[PIPE_FORMAT_B8G8R8X8_SRGB] = 0, |
[PIPE_FORMAT_A8R8G8B8_SRGB] = 0, |
[PIPE_FORMAT_X8R8G8B8_SRGB] = 0, |
[PIPE_FORMAT_R8G8B8A8_SRGB] = 0, |
[PIPE_FORMAT_DXT1_RGB] = BRW_SURFACEFORMAT_DXT1_RGB, |
[PIPE_FORMAT_DXT1_RGBA] = BRW_SURFACEFORMAT_BC1_UNORM, |
[PIPE_FORMAT_DXT3_RGBA] = BRW_SURFACEFORMAT_BC2_UNORM, |
[PIPE_FORMAT_DXT5_RGBA] = BRW_SURFACEFORMAT_BC3_UNORM, |
[PIPE_FORMAT_DXT1_SRGB] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB, |
[PIPE_FORMAT_DXT1_SRGBA] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB, |
[PIPE_FORMAT_DXT3_SRGBA] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB, |
[PIPE_FORMAT_DXT5_SRGBA] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB, |
[PIPE_FORMAT_RGTC1_UNORM] = BRW_SURFACEFORMAT_BC4_UNORM, |
[PIPE_FORMAT_RGTC1_SNORM] = BRW_SURFACEFORMAT_BC4_SNORM, |
[PIPE_FORMAT_RGTC2_UNORM] = BRW_SURFACEFORMAT_BC5_UNORM, |
[PIPE_FORMAT_RGTC2_SNORM] = BRW_SURFACEFORMAT_BC5_SNORM, |
[PIPE_FORMAT_R8G8_B8G8_UNORM] = 0, |
[PIPE_FORMAT_G8R8_G8B8_UNORM] = 0, |
[PIPE_FORMAT_R8SG8SB8UX8U_NORM] = 0, |
[PIPE_FORMAT_R5SG5SB6U_NORM] = 0, |
[PIPE_FORMAT_A8B8G8R8_UNORM] = 0, |
[PIPE_FORMAT_B5G5R5X1_UNORM] = BRW_SURFACEFORMAT_B5G5R5X1_UNORM, |
[PIPE_FORMAT_R10G10B10A2_USCALED] = BRW_SURFACEFORMAT_R10G10B10A2_USCALED, |
[PIPE_FORMAT_R11G11B10_FLOAT] = BRW_SURFACEFORMAT_R11G11B10_FLOAT, |
[PIPE_FORMAT_R9G9B9E5_FLOAT] = BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP, |
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = 0, |
[PIPE_FORMAT_R1_UNORM] = 0, |
[PIPE_FORMAT_R10G10B10X2_USCALED] = BRW_SURFACEFORMAT_R10G10B10X2_USCALED, |
[PIPE_FORMAT_R10G10B10X2_SNORM] = 0, |
[PIPE_FORMAT_L4A4_UNORM] = 0, |
[PIPE_FORMAT_B10G10R10A2_UNORM] = BRW_SURFACEFORMAT_B10G10R10A2_UNORM, |
[PIPE_FORMAT_R10SG10SB10SA2U_NORM] = 0, |
[PIPE_FORMAT_R8G8Bx_SNORM] = 0, |
[PIPE_FORMAT_R8G8B8X8_UNORM] = BRW_SURFACEFORMAT_R8G8B8X8_UNORM, |
[PIPE_FORMAT_B4G4R4X4_UNORM] = 0, |
[PIPE_FORMAT_X24S8_UINT] = 0, |
[PIPE_FORMAT_S8X24_UINT] = 0, |
[PIPE_FORMAT_X32_S8X24_UINT] = 0, |
[PIPE_FORMAT_B2G3R3_UNORM] = 0, |
[PIPE_FORMAT_L16A16_UNORM] = BRW_SURFACEFORMAT_L16A16_UNORM, |
[PIPE_FORMAT_A16_UNORM] = BRW_SURFACEFORMAT_A16_UNORM, |
[PIPE_FORMAT_I16_UNORM] = BRW_SURFACEFORMAT_I16_UNORM, |
[PIPE_FORMAT_LATC1_UNORM] = 0, |
[PIPE_FORMAT_LATC1_SNORM] = 0, |
[PIPE_FORMAT_LATC2_UNORM] = 0, |
[PIPE_FORMAT_LATC2_SNORM] = 0, |
[PIPE_FORMAT_A8_SNORM] = 0, |
[PIPE_FORMAT_L8_SNORM] = 0, |
[PIPE_FORMAT_L8A8_SNORM] = 0, |
[PIPE_FORMAT_I8_SNORM] = 0, |
[PIPE_FORMAT_A16_SNORM] = 0, |
[PIPE_FORMAT_L16_SNORM] = 0, |
[PIPE_FORMAT_L16A16_SNORM] = 0, |
[PIPE_FORMAT_I16_SNORM] = 0, |
[PIPE_FORMAT_A16_FLOAT] = BRW_SURFACEFORMAT_A16_FLOAT, |
[PIPE_FORMAT_L16_FLOAT] = BRW_SURFACEFORMAT_L16_FLOAT, |
[PIPE_FORMAT_L16A16_FLOAT] = BRW_SURFACEFORMAT_L16A16_FLOAT, |
[PIPE_FORMAT_I16_FLOAT] = BRW_SURFACEFORMAT_I16_FLOAT, |
[PIPE_FORMAT_A32_FLOAT] = BRW_SURFACEFORMAT_A32_FLOAT, |
[PIPE_FORMAT_L32_FLOAT] = BRW_SURFACEFORMAT_L32_FLOAT, |
[PIPE_FORMAT_L32A32_FLOAT] = BRW_SURFACEFORMAT_L32A32_FLOAT, |
[PIPE_FORMAT_I32_FLOAT] = BRW_SURFACEFORMAT_I32_FLOAT, |
[PIPE_FORMAT_YV12] = 0, |
[PIPE_FORMAT_YV16] = 0, |
[PIPE_FORMAT_IYUV] = 0, |
[PIPE_FORMAT_NV12] = 0, |
[PIPE_FORMAT_NV21] = 0, |
[PIPE_FORMAT_R4A4_UNORM] = 0, |
[PIPE_FORMAT_A4R4_UNORM] = 0, |
[PIPE_FORMAT_R8A8_UNORM] = 0, |
[PIPE_FORMAT_A8R8_UNORM] = 0, |
[PIPE_FORMAT_R10G10B10A2_SSCALED] = BRW_SURFACEFORMAT_R10G10B10A2_SSCALED, |
[PIPE_FORMAT_R10G10B10A2_SNORM] = BRW_SURFACEFORMAT_R10G10B10A2_SNORM, |
[PIPE_FORMAT_B10G10R10A2_USCALED] = BRW_SURFACEFORMAT_B10G10R10A2_USCALED, |
[PIPE_FORMAT_B10G10R10A2_SSCALED] = BRW_SURFACEFORMAT_B10G10R10A2_SSCALED, |
[PIPE_FORMAT_B10G10R10A2_SNORM] = BRW_SURFACEFORMAT_B10G10R10A2_SNORM, |
[PIPE_FORMAT_R8_UINT] = BRW_SURFACEFORMAT_R8_UINT, |
[PIPE_FORMAT_R8G8_UINT] = BRW_SURFACEFORMAT_R8G8_UINT, |
[PIPE_FORMAT_R8G8B8_UINT] = BRW_SURFACEFORMAT_R8G8B8_UINT, |
[PIPE_FORMAT_R8G8B8A8_UINT] = BRW_SURFACEFORMAT_R8G8B8A8_UINT, |
[PIPE_FORMAT_R8_SINT] = BRW_SURFACEFORMAT_R8_SINT, |
[PIPE_FORMAT_R8G8_SINT] = BRW_SURFACEFORMAT_R8G8_SINT, |
[PIPE_FORMAT_R8G8B8_SINT] = BRW_SURFACEFORMAT_R8G8B8_SINT, |
[PIPE_FORMAT_R8G8B8A8_SINT] = BRW_SURFACEFORMAT_R8G8B8A8_SINT, |
[PIPE_FORMAT_R16_UINT] = BRW_SURFACEFORMAT_R16_UINT, |
[PIPE_FORMAT_R16G16_UINT] = BRW_SURFACEFORMAT_R16G16_UINT, |
[PIPE_FORMAT_R16G16B16_UINT] = BRW_SURFACEFORMAT_R16G16B16_UINT, |
[PIPE_FORMAT_R16G16B16A16_UINT] = BRW_SURFACEFORMAT_R16G16B16A16_UINT, |
[PIPE_FORMAT_R16_SINT] = BRW_SURFACEFORMAT_R16_SINT, |
[PIPE_FORMAT_R16G16_SINT] = BRW_SURFACEFORMAT_R16G16_SINT, |
[PIPE_FORMAT_R16G16B16_SINT] = BRW_SURFACEFORMAT_R16G16B16_SINT, |
[PIPE_FORMAT_R16G16B16A16_SINT] = BRW_SURFACEFORMAT_R16G16B16A16_SINT, |
[PIPE_FORMAT_R32_UINT] = BRW_SURFACEFORMAT_R32_UINT, |
[PIPE_FORMAT_R32G32_UINT] = BRW_SURFACEFORMAT_R32G32_UINT, |
[PIPE_FORMAT_R32G32B32_UINT] = BRW_SURFACEFORMAT_R32G32B32_UINT, |
[PIPE_FORMAT_R32G32B32A32_UINT] = BRW_SURFACEFORMAT_R32G32B32A32_UINT, |
[PIPE_FORMAT_R32_SINT] = BRW_SURFACEFORMAT_R32_SINT, |
[PIPE_FORMAT_R32G32_SINT] = BRW_SURFACEFORMAT_R32G32_SINT, |
[PIPE_FORMAT_R32G32B32_SINT] = BRW_SURFACEFORMAT_R32G32B32_SINT, |
[PIPE_FORMAT_R32G32B32A32_SINT] = BRW_SURFACEFORMAT_R32G32B32A32_SINT, |
[PIPE_FORMAT_A8_UINT] = 0, |
[PIPE_FORMAT_I8_UINT] = BRW_SURFACEFORMAT_I8_UINT, |
[PIPE_FORMAT_L8_UINT] = BRW_SURFACEFORMAT_L8_UINT, |
[PIPE_FORMAT_L8A8_UINT] = BRW_SURFACEFORMAT_L8A8_UINT, |
[PIPE_FORMAT_A8_SINT] = 0, |
[PIPE_FORMAT_I8_SINT] = BRW_SURFACEFORMAT_I8_SINT, |
[PIPE_FORMAT_L8_SINT] = BRW_SURFACEFORMAT_L8_SINT, |
[PIPE_FORMAT_L8A8_SINT] = BRW_SURFACEFORMAT_L8A8_SINT, |
[PIPE_FORMAT_A16_UINT] = 0, |
[PIPE_FORMAT_I16_UINT] = 0, |
[PIPE_FORMAT_L16_UINT] = 0, |
[PIPE_FORMAT_L16A16_UINT] = 0, |
[PIPE_FORMAT_A16_SINT] = 0, |
[PIPE_FORMAT_I16_SINT] = 0, |
[PIPE_FORMAT_L16_SINT] = 0, |
[PIPE_FORMAT_L16A16_SINT] = 0, |
[PIPE_FORMAT_A32_UINT] = 0, |
[PIPE_FORMAT_I32_UINT] = 0, |
[PIPE_FORMAT_L32_UINT] = 0, |
[PIPE_FORMAT_L32A32_UINT] = 0, |
[PIPE_FORMAT_A32_SINT] = 0, |
[PIPE_FORMAT_I32_SINT] = 0, |
[PIPE_FORMAT_L32_SINT] = 0, |
[PIPE_FORMAT_L32A32_SINT] = 0, |
[PIPE_FORMAT_B10G10R10A2_UINT] = BRW_SURFACEFORMAT_B10G10R10A2_UINT, |
[PIPE_FORMAT_ETC1_RGB8] = BRW_SURFACEFORMAT_ETC1_RGB8, |
[PIPE_FORMAT_R8G8_R8B8_UNORM] = 0, |
[PIPE_FORMAT_G8R8_B8R8_UNORM] = 0, |
[PIPE_FORMAT_R8G8B8X8_SNORM] = 0, |
[PIPE_FORMAT_R8G8B8X8_SRGB] = 0, |
[PIPE_FORMAT_R8G8B8X8_UINT] = 0, |
[PIPE_FORMAT_R8G8B8X8_SINT] = 0, |
[PIPE_FORMAT_B10G10R10X2_UNORM] = BRW_SURFACEFORMAT_B10G10R10X2_UNORM, |
[PIPE_FORMAT_R16G16B16X16_UNORM] = BRW_SURFACEFORMAT_R16G16B16X16_UNORM, |
[PIPE_FORMAT_R16G16B16X16_SNORM] = 0, |
[PIPE_FORMAT_R16G16B16X16_FLOAT] = BRW_SURFACEFORMAT_R16G16B16X16_FLOAT, |
[PIPE_FORMAT_R16G16B16X16_UINT] = 0, |
[PIPE_FORMAT_R16G16B16X16_SINT] = 0, |
[PIPE_FORMAT_R32G32B32X32_FLOAT] = BRW_SURFACEFORMAT_R32G32B32X32_FLOAT, |
[PIPE_FORMAT_R32G32B32X32_UINT] = 0, |
[PIPE_FORMAT_R32G32B32X32_SINT] = 0, |
[PIPE_FORMAT_R8A8_SNORM] = 0, |
[PIPE_FORMAT_R16A16_UNORM] = 0, |
[PIPE_FORMAT_R16A16_SNORM] = 0, |
[PIPE_FORMAT_R16A16_FLOAT] = 0, |
[PIPE_FORMAT_R32A32_FLOAT] = 0, |
[PIPE_FORMAT_R8A8_UINT] = 0, |
[PIPE_FORMAT_R8A8_SINT] = 0, |
[PIPE_FORMAT_R16A16_UINT] = 0, |
[PIPE_FORMAT_R16A16_SINT] = 0, |
[PIPE_FORMAT_R32A32_UINT] = 0, |
[PIPE_FORMAT_R32A32_SINT] = 0, |
}; |
int sfmt = format_mapping[format]; |
/* BRW_SURFACEFORMAT_R32G32B32A32_FLOAT happens to be 0 */ |
if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT) |
sfmt = -1; |
return sfmt; |
} |
static boolean |
ilo_is_format_supported(struct pipe_screen *screen, |
enum pipe_format format, |
enum pipe_texture_target target, |
unsigned sample_count, |
unsigned bindings) |
{ |
struct ilo_screen *is = ilo_screen(screen); |
const int gen = ILO_GEN_GET_MAJOR(is->dev.gen * 10); |
const bool is_pure_int = util_format_is_pure_integer(format); |
const struct surface_format_info *info; |
unsigned bind; |
if (!util_format_is_supported(format, bindings)) |
return false; |
/* no MSAA support yet */ |
if (sample_count > 1) |
return false; |
bind = (bindings & PIPE_BIND_DEPTH_STENCIL); |
if (bind) { |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
case PIPE_FORMAT_Z24X8_UNORM: |
case PIPE_FORMAT_Z32_FLOAT: |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
break; |
case PIPE_FORMAT_S8_UINT: |
/* TODO separate stencil */ |
default: |
return false; |
} |
} |
bind = (bindings & PIPE_BIND_RENDER_TARGET); |
if (bind) { |
info = lookup_surface_format_info(format, bind); |
if (gen < info->render_target) |
return false; |
if (!is_pure_int && gen < info->alpha_blend) |
return false; |
} |
bind = (bindings & PIPE_BIND_SAMPLER_VIEW); |
if (bind) { |
info = lookup_surface_format_info(format, bind); |
if (gen < info->sampling) |
return false; |
if (!is_pure_int && gen < info->filtering) |
return false; |
} |
bind = (bindings & PIPE_BIND_VERTEX_BUFFER); |
if (bind) { |
info = lookup_surface_format_info(format, bind); |
if (gen < info->input_vb) |
return false; |
} |
return true; |
} |
static boolean |
ilo_is_video_format_supported(struct pipe_screen *screen, |
enum pipe_format format, |
enum pipe_video_profile profile) |
{ |
return vl_video_buffer_is_format_supported(screen, format, profile); |
} |
/** |
* Initialize format-related functions. |
*/ |
void |
ilo_init_format_functions(struct ilo_screen *is) |
{ |
is->base.is_format_supported = ilo_is_format_supported; |
is->base.is_video_format_supported = ilo_is_video_format_supported; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_format.h |
---|
0,0 → 1,142 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_FORMAT_H |
#define ILO_FORMAT_H |
#include "brw_defines.h" |
#include "ilo_common.h" |
struct ilo_screen; |
void |
ilo_init_format_functions(struct ilo_screen *is); |
int |
ilo_translate_color_format(enum pipe_format format); |
/** |
* Translate a pipe format to a hardware surface format suitable for |
* the given purpose. Return -1 on errors. |
* |
* This is an inline function not only for performance reasons. There are |
* caveats that the callers should that before calling this function. |
*/ |
static inline int |
ilo_translate_format(enum pipe_format format, unsigned bind) |
{ |
switch (bind) { |
case PIPE_BIND_RENDER_TARGET: |
/* |
* Some RGBX formats are not supported as render target formats. But we |
* can use their RGBA counterparts and force the destination alpha to be |
* one when blending is enabled. |
*/ |
switch (format) { |
case PIPE_FORMAT_B8G8R8X8_UNORM: |
return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; |
default: |
return ilo_translate_color_format(format); |
} |
break; |
case PIPE_BIND_SAMPLER_VIEW: |
/* |
* For depth formats, we want the depth values to be returned as R |
* values. But we assume in many places that the depth values are |
* returned as I values (util_make_fragment_tex_shader_writedepth() is |
* one such example). We have to live with that at least for now. |
* |
* For ETC1 format, the texture data will be decompressed before being |
* written to the bo. See tex_staging_sys_convert_write(). |
*/ |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
return BRW_SURFACEFORMAT_I16_UNORM; |
case PIPE_FORMAT_Z32_FLOAT: |
return BRW_SURFACEFORMAT_I32_FLOAT; |
case PIPE_FORMAT_Z24X8_UNORM: |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
return BRW_SURFACEFORMAT_I24X8_UNORM; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
return BRW_SURFACEFORMAT_I32X32_FLOAT; |
case PIPE_FORMAT_ETC1_RGB8: |
return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; |
default: |
return ilo_translate_color_format(format); |
} |
break; |
case PIPE_BIND_VERTEX_BUFFER: |
/* |
* Some 3-component formats are not supported as vertex element formats. |
* But since we move between vertices using vb->stride, we should be |
* good to use their 4-component counterparts if we force the W |
* component to be one. The only exception is that the vb boundary |
* check for the last vertex may fail. |
*/ |
switch (format) { |
case PIPE_FORMAT_R16G16B16_FLOAT: |
return BRW_SURFACEFORMAT_R16G16B16A16_FLOAT; |
case PIPE_FORMAT_R16G16B16_UINT: |
return BRW_SURFACEFORMAT_R16G16B16A16_UINT; |
case PIPE_FORMAT_R16G16B16_SINT: |
return BRW_SURFACEFORMAT_R16G16B16A16_SINT; |
case PIPE_FORMAT_R8G8B8_UINT: |
return BRW_SURFACEFORMAT_R8G8B8A8_UINT; |
case PIPE_FORMAT_R8G8B8_SINT: |
return BRW_SURFACEFORMAT_R8G8B8A8_SINT; |
default: |
return ilo_translate_color_format(format); |
} |
break; |
default: |
assert(!"cannot translate format"); |
break; |
} |
return -1; |
} |
static inline int |
ilo_translate_render_format(enum pipe_format format) |
{ |
return ilo_translate_format(format, PIPE_BIND_RENDER_TARGET); |
} |
static inline int |
ilo_translate_texture_format(enum pipe_format format) |
{ |
return ilo_translate_format(format, PIPE_BIND_SAMPLER_VIEW); |
} |
static inline int |
ilo_translate_vertex_format(enum pipe_format format) |
{ |
return ilo_translate_format(format, PIPE_BIND_VERTEX_BUFFER); |
} |
#endif /* ILO_FORMAT_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe.h |
---|
0,0 → 1,528 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_GPE_H |
#define ILO_GPE_H |
#include "ilo_common.h" |
/** |
* \see brw_context.h |
*/ |
#define ILO_MAX_DRAW_BUFFERS 8 |
#define ILO_MAX_CONST_BUFFERS (1 + 12) |
#define ILO_MAX_SAMPLER_VIEWS 16 |
#define ILO_MAX_SAMPLERS 16 |
#define ILO_MAX_SO_BINDINGS 64 |
#define ILO_MAX_SO_BUFFERS 4 |
#define ILO_MAX_VIEWPORTS 1 |
#define ILO_MAX_VS_SURFACES (ILO_MAX_CONST_BUFFERS + ILO_MAX_SAMPLER_VIEWS) |
#define ILO_VS_CONST_SURFACE(i) (i) |
#define ILO_VS_TEXTURE_SURFACE(i) (ILO_MAX_CONST_BUFFERS + i) |
#define ILO_MAX_GS_SURFACES (ILO_MAX_SO_BINDINGS) |
#define ILO_GS_SO_SURFACE(i) (i) |
#define ILO_MAX_WM_SURFACES (ILO_MAX_DRAW_BUFFERS + ILO_MAX_CONST_BUFFERS + ILO_MAX_SAMPLER_VIEWS) |
#define ILO_WM_DRAW_SURFACE(i) (i) |
#define ILO_WM_CONST_SURFACE(i) (ILO_MAX_DRAW_BUFFERS + i) |
#define ILO_WM_TEXTURE_SURFACE(i) (ILO_MAX_DRAW_BUFFERS + ILO_MAX_CONST_BUFFERS + i) |
struct ilo_buffer; |
struct ilo_texture; |
struct ilo_shader_state; |
struct ilo_vb_state { |
struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS]; |
uint32_t enabled_mask; |
}; |
struct ilo_ib_state { |
struct pipe_resource *buffer; |
const void *user_buffer; |
unsigned offset; |
unsigned index_size; |
/* these are not valid until the state is finalized */ |
struct pipe_resource *hw_resource; |
unsigned hw_index_size; |
/* an offset to be added to pipe_draw_info::start */ |
int64_t draw_start_offset; |
}; |
struct ilo_ve_cso { |
/* VERTEX_ELEMENT_STATE */ |
uint32_t payload[2]; |
}; |
struct ilo_ve_state { |
struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS]; |
unsigned count; |
unsigned instance_divisors[PIPE_MAX_ATTRIBS]; |
unsigned vb_mapping[PIPE_MAX_ATTRIBS]; |
unsigned vb_count; |
}; |
struct ilo_so_state { |
struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS]; |
unsigned count; |
unsigned append_bitmask; |
bool enabled; |
}; |
struct ilo_viewport_cso { |
/* matrix form */ |
float m00, m11, m22, m30, m31, m32; |
/* guardband in NDC space */ |
float min_gbx, min_gby, max_gbx, max_gby; |
/* viewport in screen space */ |
float min_x, min_y, min_z; |
float max_x, max_y, max_z; |
}; |
struct ilo_viewport_state { |
struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS]; |
unsigned count; |
struct pipe_viewport_state viewport0; |
}; |
struct ilo_scissor_state { |
/* SCISSOR_RECT */ |
uint32_t payload[ILO_MAX_VIEWPORTS * 2]; |
struct pipe_scissor_state scissor0; |
}; |
struct ilo_rasterizer_clip { |
/* 3DSTATE_CLIP */ |
uint32_t payload[3]; |
uint32_t can_enable_guardband; |
}; |
struct ilo_rasterizer_sf { |
/* 3DSTATE_SF */ |
uint32_t payload[6]; |
uint32_t dw_msaa; |
}; |
struct ilo_rasterizer_wm { |
/* 3DSTATE_WM */ |
uint32_t payload[2]; |
uint32_t dw_msaa_rast; |
uint32_t dw_msaa_disp; |
}; |
struct ilo_rasterizer_state { |
struct pipe_rasterizer_state state; |
struct ilo_rasterizer_clip clip; |
struct ilo_rasterizer_sf sf; |
struct ilo_rasterizer_wm wm; |
}; |
struct ilo_dsa_state { |
/* DEPTH_STENCIL_STATE */ |
uint32_t payload[3]; |
struct pipe_alpha_state alpha; |
}; |
struct ilo_blend_cso { |
/* BLEND_STATE */ |
uint32_t payload[2]; |
uint32_t dw_blend; |
uint32_t dw_blend_dst_alpha_forced_one; |
uint32_t dw_logicop; |
uint32_t dw_alpha_mod; |
}; |
struct ilo_blend_state { |
struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS]; |
bool independent_blend_enable; |
bool dual_blend; |
bool alpha_to_coverage; |
}; |
struct ilo_sampler_cso { |
/* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */ |
uint32_t payload[15]; |
uint32_t dw_filter; |
uint32_t dw_filter_aniso; |
uint32_t dw_wrap; |
uint32_t dw_wrap_1d; |
uint32_t dw_wrap_cube; |
bool anisotropic; |
bool saturate_r; |
bool saturate_s; |
bool saturate_t; |
}; |
struct ilo_sampler_state { |
const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS]; |
unsigned count; |
}; |
struct ilo_view_surface { |
/* SURFACE_STATE */ |
uint32_t payload[8]; |
struct intel_bo *bo; |
}; |
struct ilo_view_cso { |
struct pipe_sampler_view base; |
struct ilo_view_surface surface; |
}; |
struct ilo_view_state { |
struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS]; |
unsigned count; |
}; |
struct ilo_cbuf_cso { |
struct pipe_resource *resource; |
struct ilo_view_surface surface; |
/* |
* this CSO is not so constant because user buffer needs to be uploaded in |
* finalize_constant_buffers() |
*/ |
const void *user_buffer; |
unsigned user_buffer_size; |
}; |
struct ilo_cbuf_state { |
struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS]; |
uint32_t enabled_mask; |
}; |
struct ilo_resource_state { |
struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES]; |
unsigned count; |
}; |
struct ilo_surface_cso { |
struct pipe_surface base; |
bool is_rt; |
union { |
struct ilo_view_surface rt; |
struct ilo_zs_surface { |
uint32_t payload[10]; |
struct intel_bo *bo; |
struct intel_bo *hiz_bo; |
struct intel_bo *separate_s8_bo; |
} zs; |
} u; |
}; |
struct ilo_fb_state { |
struct pipe_framebuffer_state state; |
struct ilo_zs_surface null_zs; |
unsigned num_samples; |
}; |
struct ilo_global_binding { |
/* |
* XXX These should not be treated as real resources (and there could be |
* thousands of them). They should be treated as regions in GLOBAL |
* resource, which is the only real resource. |
* |
* That is, a resource here should instead be |
* |
* struct ilo_global_region { |
* struct pipe_resource base; |
* int offset; |
* int size; |
* }; |
* |
* and it describes the region [offset, offset + size) in GLOBAL |
* resource. |
*/ |
struct pipe_resource *resources[PIPE_MAX_SHADER_RESOURCES]; |
uint32_t *handles[PIPE_MAX_SHADER_RESOURCES]; |
unsigned count; |
}; |
struct ilo_shader_cso { |
uint32_t payload[5]; |
}; |
void |
ilo_gpe_init_ve(const struct ilo_dev_info *dev, |
unsigned num_states, |
const struct pipe_vertex_element *states, |
struct ilo_ve_state *ve); |
void |
ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, |
const struct pipe_viewport_state *state, |
struct ilo_viewport_cso *vp); |
void |
ilo_gpe_set_scissor(const struct ilo_dev_info *dev, |
unsigned start_slot, |
unsigned num_states, |
const struct pipe_scissor_state *states, |
struct ilo_scissor_state *scissor); |
void |
ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev, |
struct ilo_scissor_state *scissor); |
void |
ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_clip *clip); |
void |
ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_sf *sf); |
void |
ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_wm *wm); |
void |
ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_wm *wm); |
static inline void |
ilo_gpe_init_rasterizer(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_state *rasterizer) |
{ |
ilo_gpe_init_rasterizer_clip(dev, state, &rasterizer->clip); |
ilo_gpe_init_rasterizer_sf(dev, state, &rasterizer->sf); |
if (dev->gen >= ILO_GEN(7)) |
ilo_gpe_init_rasterizer_wm_gen7(dev, state, &rasterizer->wm); |
else |
ilo_gpe_init_rasterizer_wm_gen6(dev, state, &rasterizer->wm); |
} |
void |
ilo_gpe_init_dsa(const struct ilo_dev_info *dev, |
const struct pipe_depth_stencil_alpha_state *state, |
struct ilo_dsa_state *dsa); |
void |
ilo_gpe_init_blend(const struct ilo_dev_info *dev, |
const struct pipe_blend_state *state, |
struct ilo_blend_state *blend); |
void |
ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, |
const struct pipe_sampler_state *state, |
struct ilo_sampler_cso *sampler); |
void |
ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf); |
void |
ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf); |
static inline void |
ilo_gpe_init_view_surface_null(const struct ilo_dev_info *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf) |
{ |
if (dev->gen >= ILO_GEN(7)) { |
ilo_gpe_init_view_surface_null_gen7(dev, |
width, height, depth, level, surf); |
} |
else { |
ilo_gpe_init_view_surface_null_gen6(dev, |
width, height, depth, level, surf); |
} |
} |
static inline void |
ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev_info *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
if (dev->gen >= ILO_GEN(7)) { |
ilo_gpe_init_view_surface_for_buffer_gen7(dev, buf, offset, size, |
struct_size, elem_format, is_rt, render_cache_rw, surf); |
} |
else { |
ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, offset, size, |
struct_size, elem_format, is_rt, render_cache_rw, surf); |
} |
} |
static inline void |
ilo_gpe_init_view_surface_for_texture(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
if (dev->gen >= ILO_GEN(7)) { |
ilo_gpe_init_view_surface_for_texture_gen7(dev, tex, format, |
first_level, num_levels, first_layer, num_layers, |
is_rt, render_cache_rw, surf); |
} |
else { |
ilo_gpe_init_view_surface_for_texture_gen6(dev, tex, format, |
first_level, num_levels, first_layer, num_layers, |
is_rt, render_cache_rw, surf); |
} |
} |
void |
ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned level, |
unsigned first_layer, unsigned num_layers, |
struct ilo_zs_surface *zs); |
void |
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *vs, |
struct ilo_shader_cso *cso); |
void |
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso); |
void |
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso); |
static inline void |
ilo_gpe_init_gs_cso(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso) |
{ |
if (dev->gen >= ILO_GEN(7)) { |
ilo_gpe_init_gs_cso_gen7(dev, gs, cso); |
} |
else { |
ilo_gpe_init_gs_cso_gen6(dev, gs, cso); |
} |
} |
void |
ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso); |
void |
ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso); |
static inline void |
ilo_gpe_init_fs_cso(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso) |
{ |
if (dev->gen >= ILO_GEN(7)) { |
ilo_gpe_init_fs_cso_gen7(dev, fs, cso); |
} |
else { |
ilo_gpe_init_fs_cso_gen6(dev, fs, cso); |
} |
} |
#endif /* ILO_GPE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c |
---|
0,0 → 1,5032 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_dual_blend.h" |
#include "util/u_half.h" |
#include "brw_defines.h" |
#include "intel_reg.h" |
#include "ilo_context.h" |
#include "ilo_cp.h" |
#include "ilo_format.h" |
#include "ilo_resource.h" |
#include "ilo_shader.h" |
#include "ilo_state.h" |
#include "ilo_gpe_gen6.h" |
/** |
* Translate winsys tiling to hardware tiling. |
*/ |
int |
ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling) |
{ |
switch (tiling) { |
case INTEL_TILING_NONE: |
return 0; |
case INTEL_TILING_X: |
return BRW_SURFACE_TILED; |
case INTEL_TILING_Y: |
return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y; |
default: |
assert(!"unknown tiling"); |
return 0; |
} |
} |
/** |
* Translate a pipe primitive type to the matching hardware primitive type. |
*/ |
int |
ilo_gpe_gen6_translate_pipe_prim(unsigned prim) |
{ |
static const int prim_mapping[PIPE_PRIM_MAX] = { |
[PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST, |
[PIPE_PRIM_LINES] = _3DPRIM_LINELIST, |
[PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP, |
[PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP, |
[PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST, |
[PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, |
[PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN, |
[PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST, |
[PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP, |
[PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON, |
[PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ, |
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, |
[PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ, |
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, |
}; |
assert(prim_mapping[prim]); |
return prim_mapping[prim]; |
} |
/** |
* Translate a pipe texture target to the matching hardware surface type. |
*/ |
int |
ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) |
{ |
switch (target) { |
case PIPE_BUFFER: |
return BRW_SURFACE_BUFFER; |
case PIPE_TEXTURE_1D: |
case PIPE_TEXTURE_1D_ARRAY: |
return BRW_SURFACE_1D; |
case PIPE_TEXTURE_2D: |
case PIPE_TEXTURE_RECT: |
case PIPE_TEXTURE_2D_ARRAY: |
return BRW_SURFACE_2D; |
case PIPE_TEXTURE_3D: |
return BRW_SURFACE_3D; |
case PIPE_TEXTURE_CUBE: |
case PIPE_TEXTURE_CUBE_ARRAY: |
return BRW_SURFACE_CUBE; |
default: |
assert(!"unknown texture target"); |
return BRW_SURFACE_BUFFER; |
} |
} |
/** |
* Translate a depth/stencil pipe format to the matching hardware |
* format. Return -1 on errors. |
*/ |
static int |
gen6_translate_depth_format(enum pipe_format format) |
{ |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
return BRW_DEPTHFORMAT_D16_UNORM; |
case PIPE_FORMAT_Z32_FLOAT: |
return BRW_DEPTHFORMAT_D32_FLOAT; |
case PIPE_FORMAT_Z24X8_UNORM: |
return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; |
default: |
return -1; |
} |
} |
/** |
* Translate a pipe logicop to the matching hardware logicop. |
*/ |
static int |
gen6_translate_pipe_logicop(unsigned logicop) |
{ |
switch (logicop) { |
case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR; |
case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR; |
case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED; |
case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED; |
case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE; |
case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT; |
case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR; |
case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND; |
case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND; |
case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV; |
case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP; |
case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED; |
case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY; |
case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE; |
case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR; |
case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET; |
default: |
assert(!"unknown logicop function"); |
return BRW_LOGICOPFUNCTION_CLEAR; |
} |
} |
/** |
* Translate a pipe blend function to the matching hardware blend function. |
*/ |
static int |
gen6_translate_pipe_blend(unsigned blend) |
{ |
switch (blend) { |
case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD; |
case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT; |
case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; |
case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN; |
case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX; |
default: |
assert(!"unknown blend function"); |
return BRW_BLENDFUNCTION_ADD; |
}; |
} |
/** |
* Translate a pipe blend factor to the matching hardware blend factor. |
*/ |
static int |
gen6_translate_pipe_blendfactor(unsigned blendfactor) |
{ |
switch (blendfactor) { |
case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE; |
case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR; |
case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA; |
case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA; |
case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR; |
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; |
case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR; |
case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA; |
case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR; |
case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA; |
case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO; |
case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR; |
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA; |
case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA; |
case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR; |
case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR; |
case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA; |
case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR; |
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA; |
default: |
assert(!"unknown blend factor"); |
return BRW_BLENDFACTOR_ONE; |
}; |
} |
/** |
* Translate a pipe stencil op to the matching hardware stencil op. |
*/ |
static int |
gen6_translate_pipe_stencil_op(unsigned stencil_op) |
{ |
switch (stencil_op) { |
case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP; |
case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO; |
case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE; |
case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT; |
case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT; |
case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR; |
case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR; |
case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT; |
default: |
assert(!"unknown stencil op"); |
return BRW_STENCILOP_KEEP; |
} |
} |
/** |
* Translate a pipe texture mipfilter to the matching hardware mipfilter. |
*/ |
static int |
gen6_translate_tex_mipfilter(unsigned filter) |
{ |
switch (filter) { |
case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST; |
case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR; |
case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE; |
default: |
assert(!"unknown mipfilter"); |
return BRW_MIPFILTER_NONE; |
} |
} |
/** |
* Translate a pipe texture filter to the matching hardware mapfilter. |
*/ |
static int |
gen6_translate_tex_filter(unsigned filter) |
{ |
switch (filter) { |
case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST; |
case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR; |
default: |
assert(!"unknown sampler filter"); |
return BRW_MAPFILTER_NEAREST; |
} |
} |
/** |
* Translate a pipe texture coordinate wrapping mode to the matching hardware |
* wrapping mode. |
*/ |
static int |
gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge) |
{ |
/* clamp to edge or border? */ |
if (wrap == PIPE_TEX_WRAP_CLAMP) { |
wrap = (clamp_to_edge) ? |
PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER; |
} |
switch (wrap) { |
case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP; |
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP; |
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER; |
case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR; |
case PIPE_TEX_WRAP_CLAMP: |
case PIPE_TEX_WRAP_MIRROR_CLAMP: |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
default: |
assert(!"unknown sampler wrap mode"); |
return BRW_TEXCOORDMODE_WRAP; |
} |
} |
/** |
* Translate a pipe DSA test function to the matching hardware compare |
* function. |
*/ |
static int |
gen6_translate_dsa_func(unsigned func) |
{ |
switch (func) { |
case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER; |
case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS; |
case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL; |
case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL; |
case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER; |
case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL; |
case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL; |
case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS; |
default: |
assert(!"unknown depth/stencil/alpha test function"); |
return BRW_COMPAREFUNCTION_NEVER; |
} |
} |
/** |
* Translate a pipe shadow compare function to the matching hardware shadow |
* function. |
*/ |
static int |
gen6_translate_shadow_func(unsigned func) |
{ |
/* |
* For PIPE_FUNC_x, the reference value is on the left-hand side of the |
* comparison, and 1.0 is returned when the comparison is true. |
* |
* For BRW_PREFILTER_x, the reference value is on the right-hand side of |
* the comparison, and 0.0 is returned when the comparison is true. |
*/ |
switch (func) { |
case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS; |
case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL; |
case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL; |
case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS; |
case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL; |
case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL; |
case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER; |
case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER; |
default: |
assert(!"unknown shadow compare function"); |
return BRW_PREFILTER_NEVER; |
} |
} |
/** |
* Translate an index size to the matching hardware index format. |
*/ |
static int |
gen6_translate_index_size(int size) |
{ |
switch (size) { |
case 4: return BRW_INDEX_DWORD; |
case 2: return BRW_INDEX_WORD; |
case 1: return BRW_INDEX_BYTE; |
default: |
assert(!"unknown index size"); |
return BRW_INDEX_BYTE; |
} |
} |
static void |
gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev, |
struct intel_bo *general_state_bo, |
struct intel_bo *surface_state_bo, |
struct intel_bo *dynamic_state_bo, |
struct intel_bo *indirect_object_bo, |
struct intel_bo *instruction_bo, |
uint32_t general_state_size, |
uint32_t dynamic_state_size, |
uint32_t indirect_object_size, |
uint32_t instruction_size, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01); |
const uint8_t cmd_len = 10; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* 4K-page aligned */ |
assert(((general_state_size | dynamic_state_size | |
indirect_object_size | instruction_size) & 0xfff) == 0); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write_bo(cp, 1, general_state_bo, |
INTEL_DOMAIN_RENDER, |
0); |
ilo_cp_write_bo(cp, 1, surface_state_bo, |
INTEL_DOMAIN_SAMPLER, |
0); |
ilo_cp_write_bo(cp, 1, dynamic_state_bo, |
INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, |
0); |
ilo_cp_write_bo(cp, 1, indirect_object_bo, |
0, |
0); |
ilo_cp_write_bo(cp, 1, instruction_bo, |
INTEL_DOMAIN_INSTRUCTION, |
0); |
if (general_state_size) { |
ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo, |
INTEL_DOMAIN_RENDER, |
0); |
} |
else { |
/* skip range check */ |
ilo_cp_write(cp, 1); |
} |
if (dynamic_state_size) { |
ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo, |
INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, |
0); |
} |
else { |
/* skip range check */ |
ilo_cp_write(cp, 0xfffff000 + 1); |
} |
if (indirect_object_size) { |
ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo, |
0, |
0); |
} |
else { |
/* skip range check */ |
ilo_cp_write(cp, 0xfffff000 + 1); |
} |
if (instruction_size) { |
ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo, |
INTEL_DOMAIN_INSTRUCTION, |
0); |
} |
else { |
/* skip range check */ |
ilo_cp_write(cp, 1); |
} |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_STATE_SIP(const struct ilo_dev_info *dev, |
uint32_t sip, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02); |
const uint8_t cmd_len = 2; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ilo_cp_begin(cp, cmd_len | (cmd_len - 2)); |
ilo_cp_write(cp, cmd); |
ilo_cp_write(cp, sip); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev, |
bool enable, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b); |
const uint8_t cmd_len = 1; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | enable); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev, |
int pipeline, |
struct ilo_cp *cp) |
{ |
const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04); |
const uint8_t cmd_len = 1; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* 3D or media */ |
assert(pipeline == 0x0 || pipeline == 0x1); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | pipeline); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev, |
int max_threads, int num_urb_entries, |
int urb_entry_size, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00); |
const uint8_t cmd_len = 8; |
uint32_t dw2, dw4; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
dw2 = (max_threads - 1) << 16 | |
num_urb_entries << 8 | |
1 << 7 | /* Reset Gateway Timer */ |
1 << 6; /* Bypass Gateway Control */ |
dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */ |
480; /* CURBE Allocation Size */ |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); /* scratch */ |
ilo_cp_write(cp, dw2); |
ilo_cp_write(cp, 0); /* MBZ */ |
ilo_cp_write(cp, dw4); |
ilo_cp_write(cp, 0); /* scoreboard */ |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev, |
uint32_t buf, int size, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01); |
const uint8_t cmd_len = 4; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(buf % 32 == 0); |
/* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */ |
size = align(size, 32); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); /* MBZ */ |
ilo_cp_write(cp, size); |
ilo_cp_write(cp, buf); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev, |
uint32_t offset, int num_ids, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02); |
const uint8_t cmd_len = 4; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(offset % 32 == 0); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); /* MBZ */ |
/* every ID has 8 DWords */ |
ilo_cp_write(cp, num_ids * 8 * 4); |
ilo_cp_write(cp, offset); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev, |
int id, int byte, int thread_count, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03); |
const uint8_t cmd_len = 2; |
uint32_t dw1; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
dw1 = id << 16 | |
byte << 8 | |
thread_count; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev, |
int thread_count_water_mark, |
int barrier_mask, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04); |
const uint8_t cmd_len = 2; |
uint32_t dw1; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
dw1 = thread_count_water_mark << 16 | |
barrier_mask; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev, |
struct ilo_cp *cp) |
{ |
assert(!"MEDIA_OBJECT_WALKER unsupported"); |
} |
static void |
gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t vs_binding_table, |
uint32_t gs_binding_table, |
uint32_t ps_binding_table, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01); |
const uint8_t cmd_len = 4; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | |
GEN6_BINDING_TABLE_MODIFY_VS | |
GEN6_BINDING_TABLE_MODIFY_GS | |
GEN6_BINDING_TABLE_MODIFY_PS); |
ilo_cp_write(cp, vs_binding_table); |
ilo_cp_write(cp, gs_binding_table); |
ilo_cp_write(cp, ps_binding_table); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t vs_sampler_state, |
uint32_t gs_sampler_state, |
uint32_t ps_sampler_state, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02); |
const uint8_t cmd_len = 4; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | |
VS_SAMPLER_STATE_CHANGE | |
GS_SAMPLER_STATE_CHANGE | |
PS_SAMPLER_STATE_CHANGE); |
ilo_cp_write(cp, vs_sampler_state); |
ilo_cp_write(cp, gs_sampler_state); |
ilo_cp_write(cp, ps_sampler_state); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev, |
int vs_total_size, int gs_total_size, |
int vs_entry_size, int gs_entry_size, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05); |
const uint8_t cmd_len = 3; |
const int row_size = 128; /* 1024 bits */ |
int vs_alloc_size, gs_alloc_size; |
int vs_num_entries, gs_num_entries; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
/* in 1024-bit URB rows */ |
vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; |
gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; |
/* the valid range is [1, 5] */ |
if (!vs_alloc_size) |
vs_alloc_size = 1; |
if (!gs_alloc_size) |
gs_alloc_size = 1; |
assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); |
/* the valid range is [24, 256] in multiples of 4 */ |
vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; |
if (vs_num_entries > 256) |
vs_num_entries = 256; |
assert(vs_num_entries >= 24); |
/* the valid range is [0, 256] in multiples of 4 */ |
gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; |
if (gs_num_entries > 256) |
gs_num_entries = 256; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT | |
vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT); |
ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT | |
(gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, |
const struct pipe_vertex_buffer *vbuffers, |
uint64_t vbuffer_mask, |
const struct ilo_ve_state *ve, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08); |
uint8_t cmd_len; |
unsigned hw_idx; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 82: |
* |
* "From 1 to 33 VBs can be specified..." |
*/ |
assert(vbuffer_mask <= (1UL << 33)); |
if (!vbuffer_mask) |
return; |
cmd_len = 1; |
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { |
const unsigned pipe_idx = ve->vb_mapping[hw_idx]; |
if (vbuffer_mask & (1 << pipe_idx)) |
cmd_len += 4; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { |
const unsigned instance_divisor = ve->instance_divisors[hw_idx]; |
const unsigned pipe_idx = ve->vb_mapping[hw_idx]; |
const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx]; |
uint32_t dw; |
if (!(vbuffer_mask & (1 << pipe_idx))) |
continue; |
dw = hw_idx << GEN6_VB0_INDEX_SHIFT; |
if (instance_divisor) |
dw |= GEN6_VB0_ACCESS_INSTANCEDATA; |
else |
dw |= GEN6_VB0_ACCESS_VERTEXDATA; |
if (dev->gen >= ILO_GEN(7)) |
dw |= GEN7_VB0_ADDRESS_MODIFYENABLE; |
/* use null vb if there is no buffer or the stride is out of range */ |
if (vb->buffer && vb->stride <= 2048) { |
const struct ilo_buffer *buf = ilo_buffer(vb->buffer); |
const uint32_t start_offset = vb->buffer_offset; |
/* |
* As noted in ilo_translate_format(), we treat some 3-component |
* formats as 4-component formats to work around hardware |
* limitations. Imagine the case where the vertex buffer holds a |
* single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. |
* The hardware would not be able to fetch it because the vertex |
* buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex |
* and that takes at least 8 bytes. |
* |
* For the workaround to work, we query the physical size, which is |
* page aligned, to calculate end_offset so that the last vertex has |
* a better chance to be fetched. |
*/ |
const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1; |
dw |= vb->stride << BRW_VB0_PITCH_SHIFT; |
ilo_cp_write(cp, dw); |
ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); |
ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); |
ilo_cp_write(cp, instance_divisor); |
} |
else { |
dw |= 1 << 13; |
ilo_cp_write(cp, dw); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, instance_divisor); |
} |
} |
ilo_cp_end(cp); |
} |
static void |
ve_set_cso_edgeflag(const struct ilo_dev_info *dev, |
struct ilo_ve_cso *cso) |
{ |
int format; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 94: |
* |
* "- This bit (Edge Flag Enable) must only be ENABLED on the last |
* valid VERTEX_ELEMENT structure. |
* |
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, |
* and Component 1-3 Control must be set to VFCOMP_NOSTORE. |
* |
* - The Source Element Format must be set to the UINT format. |
* |
* - [DevSNB]: Edge Flags are not supported for QUADLIST |
* primitives. Software may elect to convert QUADLIST primitives |
* to some set of corresponding edge-flag-supported primitive |
* types (e.g., POLYGONs) prior to submission to the 3D pipeline." |
*/ |
cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE; |
cso->payload[1] = |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | |
BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT | |
BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT | |
BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT; |
/* |
* Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via |
* glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined |
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. |
* |
* Since all the hardware cares about is whether the flags are zero or not, |
* we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case. |
*/ |
format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff; |
if (format == BRW_SURFACEFORMAT_R32_FLOAT) { |
STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT == |
BRW_SURFACEFORMAT_R32_FLOAT - 1); |
cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT); |
} |
else { |
assert(format == BRW_SURFACEFORMAT_R8_UINT); |
} |
} |
static void |
ve_init_cso_with_components(const struct ilo_dev_info *dev, |
int comp0, int comp1, int comp2, int comp3, |
struct ilo_ve_cso *cso) |
{ |
ILO_GPE_VALID_GEN(dev, 6, 7); |
STATIC_ASSERT(Elements(cso->payload) >= 2); |
cso->payload[0] = GEN6_VE0_VALID; |
cso->payload[1] = |
comp0 << BRW_VE1_COMPONENT_0_SHIFT | |
comp1 << BRW_VE1_COMPONENT_1_SHIFT | |
comp2 << BRW_VE1_COMPONENT_2_SHIFT | |
comp3 << BRW_VE1_COMPONENT_3_SHIFT; |
} |
static void |
ve_init_cso(const struct ilo_dev_info *dev, |
const struct pipe_vertex_element *state, |
unsigned vb_index, |
struct ilo_ve_cso *cso) |
{ |
int comp[4] = { |
BRW_VE1_COMPONENT_STORE_SRC, |
BRW_VE1_COMPONENT_STORE_SRC, |
BRW_VE1_COMPONENT_STORE_SRC, |
BRW_VE1_COMPONENT_STORE_SRC, |
}; |
int format; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
switch (util_format_get_nr_components(state->src_format)) { |
case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0; |
case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0; |
case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ? |
BRW_VE1_COMPONENT_STORE_1_INT : |
BRW_VE1_COMPONENT_STORE_1_FLT; |
} |
format = ilo_translate_vertex_format(state->src_format); |
STATIC_ASSERT(Elements(cso->payload) >= 2); |
cso->payload[0] = |
vb_index << GEN6_VE0_INDEX_SHIFT | |
GEN6_VE0_VALID | |
format << BRW_VE0_FORMAT_SHIFT | |
state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT; |
cso->payload[1] = |
comp[0] << BRW_VE1_COMPONENT_0_SHIFT | |
comp[1] << BRW_VE1_COMPONENT_1_SHIFT | |
comp[2] << BRW_VE1_COMPONENT_2_SHIFT | |
comp[3] << BRW_VE1_COMPONENT_3_SHIFT; |
} |
void |
ilo_gpe_init_ve(const struct ilo_dev_info *dev, |
unsigned num_states, |
const struct pipe_vertex_element *states, |
struct ilo_ve_state *ve) |
{ |
unsigned i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ve->count = num_states; |
ve->vb_count = 0; |
for (i = 0; i < num_states; i++) { |
const unsigned pipe_idx = states[i].vertex_buffer_index; |
const unsigned instance_divisor = states[i].instance_divisor; |
unsigned hw_idx; |
/* |
* map the pipe vb to the hardware vb, which has a fixed instance |
* divisor |
*/ |
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { |
if (ve->vb_mapping[hw_idx] == pipe_idx && |
ve->instance_divisors[hw_idx] == instance_divisor) |
break; |
} |
/* create one if there is no matching hardware vb */ |
if (hw_idx >= ve->vb_count) { |
hw_idx = ve->vb_count++; |
ve->vb_mapping[hw_idx] = pipe_idx; |
ve->instance_divisors[hw_idx] = instance_divisor; |
} |
ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]); |
} |
} |
static void |
gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, |
const struct ilo_ve_state *ve, |
bool last_velement_edgeflag, |
bool prepend_generated_ids, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09); |
uint8_t cmd_len; |
unsigned i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 93: |
* |
* "Up to 34 (DevSNB+) vertex elements are supported." |
*/ |
assert(ve->count + prepend_generated_ids <= 34); |
if (!ve->count && !prepend_generated_ids) { |
struct ilo_ve_cso dummy; |
ve_init_cso_with_components(dev, |
BRW_VE1_COMPONENT_STORE_0, |
BRW_VE1_COMPONENT_STORE_0, |
BRW_VE1_COMPONENT_STORE_0, |
BRW_VE1_COMPONENT_STORE_1_FLT, |
&dummy); |
cmd_len = 3; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write_multi(cp, dummy.payload, 2); |
ilo_cp_end(cp); |
return; |
} |
cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
if (prepend_generated_ids) { |
struct ilo_ve_cso gen_ids; |
ve_init_cso_with_components(dev, |
BRW_VE1_COMPONENT_STORE_VID, |
BRW_VE1_COMPONENT_STORE_IID, |
BRW_VE1_COMPONENT_NOSTORE, |
BRW_VE1_COMPONENT_NOSTORE, |
&gen_ids); |
ilo_cp_write_multi(cp, gen_ids.payload, 2); |
} |
if (last_velement_edgeflag) { |
struct ilo_ve_cso edgeflag; |
for (i = 0; i < ve->count - 1; i++) |
ilo_cp_write_multi(cp, ve->cso[i].payload, 2); |
edgeflag = ve->cso[i]; |
ve_set_cso_edgeflag(dev, &edgeflag); |
ilo_cp_write_multi(cp, edgeflag.payload, 2); |
} |
else { |
for (i = 0; i < ve->count; i++) |
ilo_cp_write_multi(cp, ve->cso[i].payload, 2); |
} |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev, |
const struct ilo_ib_state *ib, |
bool enable_cut_index, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a); |
const uint8_t cmd_len = 3; |
struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); |
uint32_t start_offset, end_offset; |
int format; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
if (!buf) |
return; |
format = gen6_translate_index_size(ib->hw_index_size); |
/* |
* set start_offset to 0 here and adjust pipe_draw_info::start with |
* ib->draw_start_offset in 3DPRIMITIVE |
*/ |
start_offset = 0; |
end_offset = buf->bo_size; |
/* end_offset must also be aligned and is inclusive */ |
end_offset -= (end_offset % ib->hw_index_size); |
end_offset--; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | |
((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) | |
format << 8); |
ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); |
ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t clip_viewport, |
uint32_t sf_viewport, |
uint32_t cc_viewport, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d); |
const uint8_t cmd_len = 4; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | |
GEN6_CLIP_VIEWPORT_MODIFY | |
GEN6_SF_VIEWPORT_MODIFY | |
GEN6_CC_VIEWPORT_MODIFY); |
ilo_cp_write(cp, clip_viewport); |
ilo_cp_write(cp, sf_viewport); |
ilo_cp_write(cp, cc_viewport); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t blend_state, |
uint32_t depth_stencil_state, |
uint32_t color_calc_state, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e); |
const uint8_t cmd_len = 4; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, blend_state | 1); |
ilo_cp_write(cp, depth_stencil_state | 1); |
ilo_cp_write(cp, color_calc_state | 1); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t scissor_rect, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f); |
const uint8_t cmd_len = 2; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, scissor_rect); |
ilo_cp_end(cp); |
} |
void |
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *vs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, vue_read_len, max_threads; |
uint32_t dw2, dw4, dw5; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 135: |
* |
* "(Vertex URB Entry Read Length) Specifies the number of pairs of |
* 128-bit vertex elements to be passed into the payload for each |
* vertex." |
* |
* "It is UNDEFINED to set this field to 0 indicating no Vertex URB |
* data to be read and passed to the thread." |
*/ |
vue_read_len = (vue_read_len + 1) / 2; |
if (!vue_read_len) |
vue_read_len = 1; |
switch (dev->gen) { |
case ILO_GEN(6): |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 22: |
* |
* "Device # of EUs #Threads/EU |
* SNB GT2 12 5 |
* SNB GT1 6 4" |
*/ |
max_threads = (dev->gt == 2) ? 60 : 24; |
break; |
case ILO_GEN(7): |
/* |
* From the Ivy Bridge PRM, volume 1 part 1, page 18: |
* |
* "Device # of EUs #Threads/EU |
* Ivy Bridge (GT2) 16 8 |
* Ivy Bridge (GT1) 6 6" |
*/ |
max_threads = (dev->gt == 2) ? 128 : 36; |
break; |
case ILO_GEN(7.5): |
/* see brwCreateContext() */ |
max_threads = (dev->gt == 2) ? 280 : 70; |
break; |
default: |
max_threads = 1; |
break; |
} |
dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT; |
dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT | |
vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT | |
0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT; |
dw5 = GEN6_VS_STATISTICS_ENABLE | |
GEN6_VS_ENABLE; |
if (dev->gen >= ILO_GEN(7.5)) |
dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT; |
else |
dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT; |
STATIC_ASSERT(Elements(cso->payload) >= 3); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
} |
static void |
gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *vs, |
int num_samplers, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10); |
const uint8_t cmd_len = 6; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
if (!vs) { |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
return; |
} |
cso = ilo_shader_get_kernel_cso(vs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs)); |
ilo_cp_write(cp, dw2); |
ilo_cp_write(cp, 0); /* scratch */ |
ilo_cp_write(cp, dw4); |
ilo_cp_write(cp, dw5); |
ilo_cp_end(cp); |
} |
void |
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, vue_read_len, max_threads; |
uint32_t dw2, dw4, dw5, dw6; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) { |
start_grf = ilo_shader_get_kernel_param(gs, |
ILO_KERNEL_URB_DATA_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); |
} |
else { |
start_grf = ilo_shader_get_kernel_param(gs, |
ILO_KERNEL_VS_GEN6_SO_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT); |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 153: |
* |
* "Specifies the amount of URB data read and passed in the thread |
* payload for each Vertex URB entry, in 256-bit register increments. |
* |
* It is UNDEFINED to set this field (Vertex URB Entry Read Length) to |
* 0 indicating no Vertex URB data to be read and passed to the |
* thread." |
*/ |
vue_read_len = (vue_read_len + 1) / 2; |
if (!vue_read_len) |
vue_read_len = 1; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 154: |
* |
* "Maximum Number of Threads valid range is [0,27] when Rendering |
* Enabled bit is set." |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 173: |
* |
* "Programming Note: If the GS stage is enabled, software must always |
* allocate at least one GS URB Entry. This is true even if the GS |
* thread never needs to output vertices to the pipeline, e.g., when |
* only performing stream output. This is an artifact of the need to |
* pass the GS thread an initial destination URB handle." |
* |
* As such, we always enable rendering, and limit the number of threads. |
*/ |
if (dev->gt == 2) { |
/* maximum is 60, but limited to 28 */ |
max_threads = 28; |
} |
else { |
/* maximum is 24, but limited to 21 (see brwCreateContext()) */ |
max_threads = 21; |
} |
dw2 = GEN6_GS_SPF_MODE; |
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | |
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; |
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | |
GEN6_GS_STATISTICS_ENABLE | |
GEN6_GS_SO_STATISTICS_ENABLE | |
GEN6_GS_RENDERING_ENABLE; |
/* |
* we cannot make use of GEN6_GS_REORDER because it will reorder |
* triangle strips according to D3D rules (triangle 2N+1 uses vertices |
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices |
* (2N+2, 2N+1, 2N+3)). |
*/ |
dw6 = GEN6_GS_ENABLE; |
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY)) |
dw6 |= GEN6_GS_DISCARD_ADJACENCY; |
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) { |
const uint32_t svbi_post_inc = |
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC); |
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE; |
if (svbi_post_inc) { |
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE | |
svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT; |
} |
} |
STATIC_ASSERT(Elements(cso->payload) >= 4); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
cso->payload[3] = dw6; |
} |
static void |
gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
const struct ilo_shader_state *vs, |
int verts_per_prim, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); |
const uint8_t cmd_len = 7; |
uint32_t dw1, dw2, dw4, dw5, dw6; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
if (gs) { |
const struct ilo_shader_cso *cso; |
dw1 = ilo_shader_get_kernel_offset(gs); |
cso = ilo_shader_get_kernel_cso(gs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
dw6 = cso->payload[3]; |
} |
else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { |
struct ilo_shader_cso cso; |
enum ilo_kernel_param param; |
switch (verts_per_prim) { |
case 1: |
param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; |
break; |
case 2: |
param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; |
break; |
default: |
param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; |
break; |
} |
dw1 = ilo_shader_get_kernel_offset(vs) + |
ilo_shader_get_kernel_param(vs, param); |
/* cannot use VS's CSO */ |
ilo_gpe_init_gs_cso_gen6(dev, vs, &cso); |
dw2 = cso.payload[0]; |
dw4 = cso.payload[1]; |
dw5 = cso.payload[2]; |
dw6 = cso.payload[3]; |
} |
else { |
dw1 = 0; |
dw2 = 0; |
dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT; |
dw5 = GEN6_GS_STATISTICS_ENABLE; |
dw6 = 0; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, dw2); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, dw4); |
ilo_cp_write(cp, dw5); |
ilo_cp_write(cp, dw6); |
ilo_cp_end(cp); |
} |
void |
ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_clip *clip) |
{ |
uint32_t dw1, dw2, dw3; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
dw1 = GEN6_CLIP_STATISTICS_ENABLE; |
if (dev->gen >= ILO_GEN(7)) { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 219: |
* |
* "Workaround : Due to Hardware issue "EarlyCull" needs to be |
* enabled only for the cases where the incoming primitive topology |
* into the clipper guaranteed to be Trilist." |
* |
* What does this mean? |
*/ |
dw1 |= 0 << 19 | |
GEN7_CLIP_EARLY_CULL; |
if (state->front_ccw) |
dw1 |= GEN7_CLIP_WINDING_CCW; |
switch (state->cull_face) { |
case PIPE_FACE_NONE: |
dw1 |= GEN7_CLIP_CULLMODE_NONE; |
break; |
case PIPE_FACE_FRONT: |
dw1 |= GEN7_CLIP_CULLMODE_FRONT; |
break; |
case PIPE_FACE_BACK: |
dw1 |= GEN7_CLIP_CULLMODE_BACK; |
break; |
case PIPE_FACE_FRONT_AND_BACK: |
dw1 |= GEN7_CLIP_CULLMODE_BOTH; |
break; |
} |
} |
dw2 = GEN6_CLIP_ENABLE | |
GEN6_CLIP_XY_TEST | |
state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | |
GEN6_CLIP_MODE_NORMAL; |
if (state->clip_halfz) |
dw2 |= GEN6_CLIP_API_D3D; |
else |
dw2 |= GEN6_CLIP_API_OGL; |
if (state->depth_clip) |
dw2 |= GEN6_CLIP_Z_TEST; |
if (state->flatshade_first) { |
dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT | |
0 << GEN6_CLIP_LINE_PROVOKE_SHIFT | |
1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT; |
} |
else { |
dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT | |
1 << GEN6_CLIP_LINE_PROVOKE_SHIFT | |
2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT; |
} |
dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | |
0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT; |
clip->payload[0] = dw1; |
clip->payload[1] = dw2; |
clip->payload[2] = dw3; |
clip->can_enable_guardband = true; |
/* |
* There are several reasons that guard band test should be disabled |
* |
* - GL wide points (to avoid partially visibie object) |
* - GL wide or AA lines (to avoid partially visibie object) |
*/ |
if (state->point_size_per_vertex || state->point_size > 1.0f) |
clip->can_enable_guardband = false; |
if (state->line_smooth || state->line_width > 1.0f) |
clip->can_enable_guardband = false; |
} |
static void |
gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
bool enable_guardband, |
int num_viewports, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12); |
const uint8_t cmd_len = 4; |
uint32_t dw1, dw2, dw3; |
if (rasterizer) { |
int interps; |
dw1 = rasterizer->clip.payload[0]; |
dw2 = rasterizer->clip.payload[1]; |
dw3 = rasterizer->clip.payload[2]; |
if (enable_guardband && rasterizer->clip.can_enable_guardband) |
dw2 |= GEN6_CLIP_GB_TEST; |
interps = (fs) ? ilo_shader_get_kernel_param(fs, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; |
if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC | |
1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC | |
1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC)) |
dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE; |
dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX | |
(num_viewports - 1); |
} |
else { |
dw1 = 0; |
dw2 = 0; |
dw3 = 0; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, dw2); |
ilo_cp_write(cp, dw3); |
ilo_cp_end(cp); |
} |
void |
ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_sf *sf) |
{ |
float offset_const, offset_scale, offset_clamp; |
int line_width, point_width; |
uint32_t dw1, dw2, dw3; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* Scale the constant term. The minimum representable value used by the HW |
* is not large enouch to be the minimum resolvable difference. |
*/ |
offset_const = state->offset_units * 2.0f; |
offset_scale = state->offset_scale; |
offset_clamp = state->offset_clamp; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 248: |
* |
* "This bit (Statistics Enable) should be set whenever clipping is |
* enabled and the Statistics Enable bit is set in CLIP_STATE. It |
* should be cleared if clipping is disabled or Statistics Enable in |
* CLIP_STATE is clear." |
*/ |
dw1 = GEN6_SF_STATISTICS_ENABLE | |
GEN6_SF_VIEWPORT_TRANSFORM_ENABLE; |
/* XXX GEN6 path seems to work fine for GEN7 */ |
if (false && dev->gen >= ILO_GEN(7)) { |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 258: |
* |
* "This bit (Legacy Global Depth Bias Enable, Global Depth Offset |
* Enable Solid , Global Depth Offset Enable Wireframe, and Global |
* Depth Offset Enable Point) should be set whenever non zero depth |
* bias (Slope, Bias) values are used. Setting this bit may have |
* some degradation of performance for some workloads." |
*/ |
if (state->offset_tri || state->offset_line || state->offset_point) { |
/* XXX need to scale offset_const according to the depth format */ |
dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS; |
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID | |
GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME | |
GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; |
} |
else { |
offset_const = 0.0f; |
offset_scale = 0.0f; |
offset_clamp = 0.0f; |
} |
} |
else { |
if (state->offset_tri) |
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; |
if (state->offset_line) |
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; |
if (state->offset_point) |
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; |
} |
switch (state->fill_front) { |
case PIPE_POLYGON_MODE_FILL: |
dw1 |= GEN6_SF_FRONT_SOLID; |
break; |
case PIPE_POLYGON_MODE_LINE: |
dw1 |= GEN6_SF_FRONT_WIREFRAME; |
break; |
case PIPE_POLYGON_MODE_POINT: |
dw1 |= GEN6_SF_FRONT_POINT; |
break; |
} |
switch (state->fill_back) { |
case PIPE_POLYGON_MODE_FILL: |
dw1 |= GEN6_SF_BACK_SOLID; |
break; |
case PIPE_POLYGON_MODE_LINE: |
dw1 |= GEN6_SF_BACK_WIREFRAME; |
break; |
case PIPE_POLYGON_MODE_POINT: |
dw1 |= GEN6_SF_BACK_POINT; |
break; |
} |
if (state->front_ccw) |
dw1 |= GEN6_SF_WINDING_CCW; |
dw2 = 0; |
if (state->line_smooth) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 251: |
* |
* "This field (Anti-aliasing Enable) must be disabled if any of the |
* render targets have integer (UINT or SINT) surface format." |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 317: |
* |
* "This field (Hierarchical Depth Buffer Enable) must be disabled |
* if Anti-aliasing Enable in 3DSTATE_SF is enabled. |
* |
* TODO We do not check those yet. |
*/ |
dw2 |= GEN6_SF_LINE_AA_ENABLE | |
GEN6_SF_LINE_END_CAP_WIDTH_1_0; |
} |
switch (state->cull_face) { |
case PIPE_FACE_NONE: |
dw2 |= GEN6_SF_CULL_NONE; |
break; |
case PIPE_FACE_FRONT: |
dw2 |= GEN6_SF_CULL_FRONT; |
break; |
case PIPE_FACE_BACK: |
dw2 |= GEN6_SF_CULL_BACK; |
break; |
case PIPE_FACE_FRONT_AND_BACK: |
dw2 |= GEN6_SF_CULL_BOTH; |
break; |
} |
/* |
* Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1) |
* pixels in the minor direction. We have to make the lines slightly |
* thicker, 0.5 pixel on both sides, so that they intersect that many |
* pixels are considered into the lines. |
* |
* Line width is in U3.7. |
*/ |
line_width = (int) ((state->line_width + |
(float) state->line_smooth) * 128.0f + 0.5f); |
line_width = CLAMP(line_width, 0, 1023); |
if (line_width == 128 && !state->line_smooth) { |
/* use GIQ rules */ |
line_width = 0; |
} |
dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT; |
if (state->scissor) |
dw2 |= GEN6_SF_SCISSOR_ENABLE; |
dw3 = GEN6_SF_LINE_AA_MODE_TRUE | |
GEN6_SF_VERTEX_SUBPIXEL_8BITS; |
if (state->line_last_pixel) |
dw3 |= 1 << 31; |
if (state->flatshade_first) { |
dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT | |
0 << GEN6_SF_LINE_PROVOKE_SHIFT | |
1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT; |
} |
else { |
dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT | |
1 << GEN6_SF_LINE_PROVOKE_SHIFT | |
2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT; |
} |
if (!state->point_size_per_vertex) |
dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH; |
/* in U8.3 */ |
point_width = (int) (state->point_size * 8.0f + 0.5f); |
point_width = CLAMP(point_width, 1, 2047); |
dw3 |= point_width; |
STATIC_ASSERT(Elements(sf->payload) >= 6); |
sf->payload[0] = dw1; |
sf->payload[1] = dw2; |
sf->payload[2] = dw3; |
sf->payload[3] = fui(offset_const); |
sf->payload[4] = fui(offset_scale); |
sf->payload[5] = fui(offset_clamp); |
if (state->multisample) { |
sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 251: |
* |
* "Software must not program a value of 0.0 when running in |
* MSRASTMODE_ON_xxx modes - zero-width lines are not available |
* when multisampling rasterization is enabled." |
*/ |
if (!line_width) { |
line_width = 128; /* 1.0f */ |
sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT; |
} |
} |
else { |
sf->dw_msaa = 0; |
} |
} |
/** |
* Fill in DW2 to DW7 of 3DSTATE_SF. |
*/ |
void |
ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
int num_samples, |
enum pipe_format depth_format, |
uint32_t *payload, unsigned payload_len) |
{ |
const struct ilo_rasterizer_sf *sf = &rasterizer->sf; |
assert(payload_len == Elements(sf->payload)); |
if (sf) { |
memcpy(payload, sf->payload, sizeof(sf->payload)); |
if (num_samples > 1) |
payload[1] |= sf->dw_msaa; |
if (dev->gen >= ILO_GEN(7)) { |
int format; |
/* separate stencil */ |
switch (depth_format) { |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
depth_format = PIPE_FORMAT_Z24X8_UNORM; |
break; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
depth_format = PIPE_FORMAT_Z32_FLOAT;; |
break; |
case PIPE_FORMAT_S8_UINT: |
depth_format = PIPE_FORMAT_NONE; |
break; |
default: |
break; |
} |
format = gen6_translate_depth_format(depth_format); |
/* FLOAT surface is assumed when there is no depth buffer */ |
if (format < 0) |
format = BRW_DEPTHFORMAT_D32_FLOAT; |
payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT; |
} |
} |
else { |
payload[0] = 0; |
payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0; |
payload[2] = 0; |
payload[3] = 0; |
payload[4] = 0; |
payload[5] = 0; |
} |
} |
/** |
* Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. |
*/ |
void |
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
const struct ilo_shader_state *last_sh, |
uint32_t *dw, int num_dwords) |
{ |
int output_count, vue_offset, vue_len; |
const struct ilo_kernel_routing *routing; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
assert(num_dwords == 13); |
if (!fs) { |
memset(dw, 0, sizeof(dw[0]) * num_dwords); |
if (dev->gen >= ILO_GEN(7)) |
dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT; |
else |
dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT; |
return; |
} |
output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); |
assert(output_count <= 32); |
routing = ilo_shader_get_kernel_routing(fs); |
vue_offset = routing->source_skip; |
assert(vue_offset % 2 == 0); |
vue_offset /= 2; |
vue_len = (routing->source_len + 1) / 2; |
if (!vue_len) |
vue_len = 1; |
if (dev->gen >= ILO_GEN(7)) { |
dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT | |
vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | |
vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; |
if (routing->swizzle_enable) |
dw[0] |= GEN7_SBE_SWIZZLE_ENABLE; |
} |
else { |
dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT | |
vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | |
vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; |
if (routing->swizzle_enable) |
dw[0] |= GEN6_SF_SWIZZLE_ENABLE; |
} |
switch (rasterizer->state.sprite_coord_mode) { |
case PIPE_SPRITE_COORD_UPPER_LEFT: |
dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT; |
break; |
case PIPE_SPRITE_COORD_LOWER_LEFT: |
dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT; |
break; |
} |
STATIC_ASSERT(Elements(routing->swizzles) >= 16); |
memcpy(&dw[1], routing->swizzles, 2 * 16); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 268: |
* |
* "This field (Point Sprite Texture Coordinate Enable) must be |
* programmed to 0 when non-point primitives are rendered." |
* |
* TODO We do not check that yet. |
*/ |
dw[9] = routing->point_sprite_enable; |
dw[10] = routing->const_interp_enable; |
/* WrapShortest enables */ |
dw[11] = 0; |
dw[12] = 0; |
} |
static void |
gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
const struct ilo_shader_state *last_sh, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); |
const uint8_t cmd_len = 20; |
uint32_t payload_raster[6], payload_sbe[13]; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer, |
1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); |
ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, |
fs, last_sh, payload_sbe, Elements(payload_sbe)); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, payload_sbe[0]); |
ilo_cp_write_multi(cp, payload_raster, 6); |
ilo_cp_write_multi(cp, &payload_sbe[1], 12); |
ilo_cp_end(cp); |
} |
void |
ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_wm *wm) |
{ |
uint32_t dw5, dw6; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
/* only the FF unit states are set, as in GEN7 */ |
dw5 = GEN6_WM_LINE_AA_WIDTH_2_0; |
/* same value as in 3DSTATE_SF */ |
if (state->line_smooth) |
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0; |
if (state->poly_stipple_enable) |
dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE; |
if (state->line_stipple_enable) |
dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE; |
dw6 = GEN6_WM_POSITION_ZW_PIXEL | |
GEN6_WM_MSRAST_OFF_PIXEL | |
GEN6_WM_MSDISPMODE_PERSAMPLE; |
if (state->bottom_edge_rule) |
dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT; |
/* |
* assertion that makes sure |
* |
* dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp; |
* |
* is valid |
*/ |
STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 && |
GEN6_WM_MSDISPMODE_PERSAMPLE == 0); |
wm->dw_msaa_rast = |
(state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0; |
wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL; |
STATIC_ASSERT(Elements(wm->payload) >= 2); |
wm->payload[0] = dw5; |
wm->payload[1] = dw6; |
} |
void |
ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, input_count, interps, max_threads; |
uint32_t dw2, dw4, dw5, dw6; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); |
input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); |
interps = ilo_shader_get_kernel_param(fs, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); |
/* see brwCreateContext() */ |
max_threads = (dev->gt == 2) ? 80 : 40; |
dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT; |
dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 | |
0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 | |
0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2; |
dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 275: |
* |
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the |
* PS kernel or color calculator has the ability to kill (discard) |
* pixels or samples, other than due to depth or stencil testing. |
* This bit is required to be ENABLED in the following situations: |
* |
* The API pixel shader program contains "killpix" or "discard" |
* instructions, or other code in the pixel shader kernel that can |
* cause the final pixel mask to differ from the pixel mask received |
* on dispatch. |
* |
* A sampler with chroma key enabled with kill pixel mode is used by |
* the pixel shader. |
* |
* Any render target has Alpha Test Enable or AlphaToCoverage Enable |
* enabled. |
* |
* The pixel shader kernel generates and outputs oMask. |
* |
* Note: As ClipDistance clipping is fully supported in hardware and |
* therefore not via PS instructions, there should be no need to |
* ENABLE this bit due to ClipDistance clipping." |
*/ |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) |
dw5 |= GEN6_WM_KILL_ENABLE; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 275: |
* |
* "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth |
* field must be set to disabled." |
* |
* TODO This is not checked yet. |
*/ |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) |
dw5 |= GEN6_WM_COMPUTED_DEPTH; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) |
dw5 |= GEN6_WM_USES_SOURCE_DEPTH; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) |
dw5 |= GEN6_WM_USES_SOURCE_W; |
/* |
* TODO set this bit only when |
* |
* a) fs writes colors and color is not masked, or |
* b) fs writes depth, or |
* c) fs or cc kills |
*/ |
if (true) |
dw5 |= GEN6_WM_DISPATCH_ENABLE; |
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); |
dw5 |= GEN6_WM_8_DISPATCH_ENABLE; |
dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT | |
GEN6_WM_POSOFFSET_NONE | |
interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; |
STATIC_ASSERT(Elements(cso->payload) >= 4); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
cso->payload[3] = dw6; |
} |
static void |
gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
int num_samplers, |
const struct ilo_rasterizer_state *rasterizer, |
bool dual_blend, bool cc_may_kill, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); |
const uint8_t cmd_len = 9; |
const int num_samples = 1; |
const struct ilo_shader_cso *fs_cso; |
uint32_t dw2, dw4, dw5, dw6; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
if (!fs) { |
/* see brwCreateContext() */ |
const int max_threads = (dev->gt == 2) ? 80 : 40; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
/* honor the valid range even if dispatching is disabled */ |
ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
return; |
} |
fs_cso = ilo_shader_get_kernel_cso(fs); |
dw2 = fs_cso->payload[0]; |
dw4 = fs_cso->payload[1]; |
dw5 = fs_cso->payload[2]; |
dw6 = fs_cso->payload[3]; |
dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT; |
if (true) { |
dw4 |= GEN6_WM_STATISTICS_ENABLE; |
} |
else { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 248: |
* |
* "This bit (Statistics Enable) must be disabled if either of these |
* bits is set: Depth Buffer Clear , Hierarchical Depth Buffer |
* Resolve Enable or Depth Buffer Resolve Enable." |
*/ |
dw4 |= GEN6_WM_DEPTH_CLEAR; |
dw4 |= GEN6_WM_DEPTH_RESOLVE; |
dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; |
} |
if (cc_may_kill) { |
dw5 |= GEN6_WM_KILL_ENABLE | |
GEN6_WM_DISPATCH_ENABLE; |
} |
if (dual_blend) |
dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE; |
dw5 |= rasterizer->wm.payload[0]; |
dw6 |= rasterizer->wm.payload[1]; |
if (num_samples > 1) { |
dw6 |= rasterizer->wm.dw_msaa_rast | |
rasterizer->wm.dw_msaa_disp; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); |
ilo_cp_write(cp, dw2); |
ilo_cp_write(cp, 0); /* scratch */ |
ilo_cp_write(cp, dw4); |
ilo_cp_write(cp, dw5); |
ilo_cp_write(cp, dw6); |
ilo_cp_write(cp, 0); /* kernel 1 */ |
ilo_cp_write(cp, 0); /* kernel 2 */ |
ilo_cp_end(cp); |
} |
static unsigned |
gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, int max_read_length, |
uint32_t *dw, int num_dwords) |
{ |
unsigned enabled = 0x0; |
int total_read_length, i; |
assert(num_dwords == 4); |
total_read_length = 0; |
for (i = 0; i < 4; i++) { |
if (i < num_bufs && sizes[i]) { |
/* in 256-bit units minus one */ |
const int read_len = (sizes[i] + 31) / 32 - 1; |
assert(bufs[i] % 32 == 0); |
assert(read_len < 32); |
enabled |= 1 << i; |
dw[i] = bufs[i] | read_len; |
total_read_length += read_len + 1; |
} |
else { |
dw[i] = 0; |
} |
} |
assert(total_read_length <= max_read_length); |
return enabled; |
} |
static void |
gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15); |
const uint8_t cmd_len = 5; |
uint32_t buf_dw[4], buf_enabled; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(num_bufs <= 4); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 138: |
* |
* "The sum of all four read length fields (each incremented to |
* represent the actual read length) must be less than or equal to 32" |
*/ |
buf_enabled = gen6_fill_3dstate_constant(dev, |
bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); |
ilo_cp_write(cp, buf_dw[0]); |
ilo_cp_write(cp, buf_dw[1]); |
ilo_cp_write(cp, buf_dw[2]); |
ilo_cp_write(cp, buf_dw[3]); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16); |
const uint8_t cmd_len = 5; |
uint32_t buf_dw[4], buf_enabled; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(num_bufs <= 4); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 161: |
* |
* "The sum of all four read length fields (each incremented to |
* represent the actual read length) must be less than or equal to 64" |
*/ |
buf_enabled = gen6_fill_3dstate_constant(dev, |
bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); |
ilo_cp_write(cp, buf_dw[0]); |
ilo_cp_write(cp, buf_dw[1]); |
ilo_cp_write(cp, buf_dw[2]); |
ilo_cp_write(cp, buf_dw[3]); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17); |
const uint8_t cmd_len = 5; |
uint32_t buf_dw[4], buf_enabled; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(num_bufs <= 4); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 287: |
* |
* "The sum of all four read length fields (each incremented to |
* represent the actual read length) must be less than or equal to 64" |
*/ |
buf_enabled = gen6_fill_3dstate_constant(dev, |
bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12); |
ilo_cp_write(cp, buf_dw[0]); |
ilo_cp_write(cp, buf_dw[1]); |
ilo_cp_write(cp, buf_dw[2]); |
ilo_cp_write(cp, buf_dw[3]); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, |
unsigned sample_mask, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); |
const uint8_t cmd_len = 2; |
const unsigned valid_mask = 0xf; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
sample_mask &= valid_mask; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, sample_mask); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev, |
unsigned x, unsigned y, |
unsigned width, unsigned height, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00); |
const uint8_t cmd_len = 4; |
unsigned xmax = x + width - 1; |
unsigned ymax = y + height - 1; |
int rect_limit; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
if (dev->gen >= ILO_GEN(7)) { |
rect_limit = 16383; |
} |
else { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 230: |
* |
* "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) |
* must be an even number" |
*/ |
assert(y % 2 == 0); |
rect_limit = 8191; |
} |
if (x > rect_limit) x = rect_limit; |
if (y > rect_limit) y = rect_limit; |
if (xmax > rect_limit) xmax = rect_limit; |
if (ymax > rect_limit) ymax = rect_limit; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, y << 16 | x); |
ilo_cp_write(cp, ymax << 16 | xmax); |
/* |
* There is no need to set the origin. It is intended to support front |
* buffer rendering. |
*/ |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
struct ilo_zs_surface_info { |
int surface_type; |
int format; |
struct { |
struct intel_bo *bo; |
unsigned stride; |
enum intel_tiling_mode tiling; |
uint32_t offset; |
} zs, stencil, hiz; |
unsigned width, height, depth; |
unsigned lod, first_layer, num_layers; |
uint32_t x_offset, y_offset; |
}; |
static void |
zs_init_info_null(const struct ilo_dev_info *dev, |
struct ilo_zs_surface_info *info) |
{ |
ILO_GPE_VALID_GEN(dev, 6, 7); |
memset(info, 0, sizeof(*info)); |
info->surface_type = BRW_SURFACE_NULL; |
info->format = BRW_DEPTHFORMAT_D32_FLOAT; |
info->width = 1; |
info->height = 1; |
info->depth = 1; |
info->num_layers = 1; |
} |
static void |
zs_init_info(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned level, |
unsigned first_layer, unsigned num_layers, |
struct ilo_zs_surface_info *info) |
{ |
const bool rebase_layer = true; |
struct intel_bo * const hiz_bo = NULL; |
bool separate_stencil; |
uint32_t x_offset[3], y_offset[3]; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
memset(info, 0, sizeof(*info)); |
info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); |
if (info->surface_type == BRW_SURFACE_CUBE) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 325-326: |
* |
* "For Other Surfaces (Cube Surfaces): |
* This field (Minimum Array Element) is ignored." |
* |
* "For Other Surfaces (Cube Surfaces): |
* This field (Render Target View Extent) is ignored." |
* |
* As such, we cannot set first_layer and num_layers on cube surfaces. |
* To work around that, treat it as a 2D surface. |
*/ |
info->surface_type = BRW_SURFACE_2D; |
} |
if (dev->gen >= ILO_GEN(7)) { |
separate_stencil = true; |
} |
else { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 317: |
* |
* "This field (Separate Stencil Buffer Enable) must be set to the |
* same value (enabled or disabled) as Hierarchical Depth Buffer |
* Enable." |
*/ |
separate_stencil = (hiz_bo != NULL); |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 317: |
* |
* "If this field (Hierarchical Depth Buffer Enable) is enabled, the |
* Surface Format of the depth buffer cannot be |
* D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil |
* requires the separate stencil buffer." |
* |
* From the Ironlake PRM, volume 2 part 1, page 330: |
* |
* "If this field (Separate Stencil Buffer Enable) is disabled, the |
* Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT." |
* |
* There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT |
* is indeed used, the depth values output by the fragment shaders will |
* be different when read back. |
* |
* As for GEN7+, separate_stencil is always true. |
*/ |
switch (format) { |
case PIPE_FORMAT_Z16_UNORM: |
info->format = BRW_DEPTHFORMAT_D16_UNORM; |
break; |
case PIPE_FORMAT_Z32_FLOAT: |
info->format = BRW_DEPTHFORMAT_D32_FLOAT; |
break; |
case PIPE_FORMAT_Z24X8_UNORM: |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
info->format = (separate_stencil) ? |
BRW_DEPTHFORMAT_D24_UNORM_X8_UINT : |
BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; |
break; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
info->format = (separate_stencil) ? |
BRW_DEPTHFORMAT_D32_FLOAT : |
BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; |
break; |
case PIPE_FORMAT_S8_UINT: |
if (separate_stencil) { |
info->format = BRW_DEPTHFORMAT_D32_FLOAT; |
break; |
} |
/* fall through */ |
default: |
assert(!"unsupported depth/stencil format"); |
zs_init_info_null(dev, info); |
return; |
break; |
} |
if (format != PIPE_FORMAT_S8_UINT) { |
info->zs.bo = tex->bo; |
info->zs.stride = tex->bo_stride; |
info->zs.tiling = tex->tiling; |
if (rebase_layer) { |
info->zs.offset = ilo_texture_get_slice_offset(tex, |
level, first_layer, &x_offset[0], &y_offset[0]); |
} |
} |
if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) { |
const struct ilo_texture *s8_tex = |
(tex->separate_s8) ? tex->separate_s8 : tex; |
info->stencil.bo = s8_tex->bo; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 329: |
* |
* "The pitch must be set to 2x the value computed based on width, |
* as the stencil buffer is stored with two rows interleaved." |
* |
* According to the classic driver, we need to do the same for GEN7+ |
* even though the Ivy Bridge PRM does not say anything about it. |
*/ |
info->stencil.stride = s8_tex->bo_stride * 2; |
info->stencil.tiling = s8_tex->tiling; |
if (rebase_layer) { |
info->stencil.offset = ilo_texture_get_slice_offset(s8_tex, |
level, first_layer, &x_offset[1], &y_offset[1]); |
} |
} |
if (hiz_bo) { |
info->hiz.bo = hiz_bo; |
info->hiz.stride = 0; |
info->hiz.tiling = 0; |
info->hiz.offset = 0; |
x_offset[2] = 0; |
y_offset[2] = 0; |
} |
info->width = tex->base.width0; |
info->height = tex->base.height0; |
info->depth = (tex->base.target == PIPE_TEXTURE_3D) ? |
tex->base.depth0 : num_layers; |
info->lod = level; |
info->first_layer = first_layer; |
info->num_layers = num_layers; |
if (rebase_layer) { |
/* the size of the layer */ |
info->width = u_minify(info->width, level); |
info->height = u_minify(info->height, level); |
if (info->surface_type == BRW_SURFACE_3D) |
info->depth = u_minify(info->depth, level); |
else |
info->depth = 1; |
/* no layered rendering */ |
assert(num_layers == 1); |
info->lod = 0; |
info->first_layer = 0; |
info->num_layers = 1; |
/* all three share the same X/Y offsets */ |
if (info->zs.bo) { |
if (info->stencil.bo) { |
assert(x_offset[0] == x_offset[1]); |
assert(y_offset[0] == y_offset[1]); |
} |
info->x_offset = x_offset[0]; |
info->y_offset = y_offset[0]; |
} |
else { |
assert(info->stencil.bo); |
info->x_offset = x_offset[1]; |
info->y_offset = y_offset[1]; |
} |
if (info->hiz.bo) { |
assert(info->x_offset == x_offset[2]); |
assert(info->y_offset == y_offset[2]); |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 326: |
* |
* "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth |
* Coordinate Offset X) must be zero to ensure correct alignment" |
* |
* XXX Skip the check for gen6, which seems to be fine. We need to make |
* sure that does not happen eventually. |
*/ |
if (dev->gen >= ILO_GEN(7)) { |
assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0); |
info->x_offset &= ~7; |
info->y_offset &= ~7; |
} |
info->width += info->x_offset; |
info->height += info->y_offset; |
/* we have to treat them as 2D surfaces */ |
if (info->surface_type == BRW_SURFACE_CUBE) { |
assert(tex->base.width0 == tex->base.height0); |
/* we will set slice_offset to point to the single face */ |
info->surface_type = BRW_SURFACE_2D; |
} |
else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) { |
assert(tex->base.height0 == 1); |
info->surface_type = BRW_SURFACE_2D; |
} |
} |
} |
void |
ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned level, |
unsigned first_layer, unsigned num_layers, |
struct ilo_zs_surface *zs) |
{ |
const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192; |
const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512; |
struct ilo_zs_surface_info info; |
uint32_t dw1, dw2, dw3, dw4, dw5, dw6; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
if (tex) |
zs_init_info(dev, tex, format, level, first_layer, num_layers, &info); |
else |
zs_init_info_null(dev, &info); |
switch (info.surface_type) { |
case BRW_SURFACE_NULL: |
break; |
case BRW_SURFACE_1D: |
assert(info.width <= max_2d_size && info.height == 1 && |
info.depth <= max_array_size); |
assert(info.first_layer < max_array_size - 1 && |
info.num_layers <= max_array_size); |
break; |
case BRW_SURFACE_2D: |
assert(info.width <= max_2d_size && info.height <= max_2d_size && |
info.depth <= max_array_size); |
assert(info.first_layer < max_array_size - 1 && |
info.num_layers <= max_array_size); |
break; |
case BRW_SURFACE_3D: |
assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048); |
assert(info.first_layer < 2048 && info.num_layers <= max_array_size); |
assert(info.x_offset == 0 && info.y_offset == 0); |
break; |
case BRW_SURFACE_CUBE: |
assert(info.width <= max_2d_size && info.height <= max_2d_size && |
info.depth == 1); |
assert(info.first_layer == 0 && info.num_layers == 1); |
assert(info.width == info.height); |
assert(info.x_offset == 0 && info.y_offset == 0); |
break; |
default: |
assert(!"unexpected depth surface type"); |
break; |
} |
dw1 = info.surface_type << 29 | |
info.format << 18; |
if (info.zs.bo) { |
/* required for GEN6+ */ |
assert(info.zs.tiling == INTEL_TILING_Y); |
assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 && |
info.zs.stride % 128 == 0); |
assert(info.width <= info.zs.stride); |
dw1 |= (info.zs.stride - 1); |
dw2 = info.zs.offset; |
} |
else { |
dw2 = 0; |
} |
if (dev->gen >= ILO_GEN(7)) { |
if (info.zs.bo) |
dw1 |= 1 << 28; |
if (info.stencil.bo) |
dw1 |= 1 << 27; |
if (info.hiz.bo) |
dw1 |= 1 << 22; |
dw3 = (info.height - 1) << 18 | |
(info.width - 1) << 4 | |
info.lod; |
dw4 = (info.depth - 1) << 21 | |
info.first_layer << 10; |
dw5 = info.y_offset << 16 | info.x_offset; |
dw6 = (info.num_layers - 1) << 21; |
} |
else { |
/* always Y-tiled */ |
dw1 |= 1 << 27 | |
1 << 26; |
if (info.hiz.bo) { |
dw1 |= 1 << 22 | |
1 << 21; |
} |
dw3 = (info.height - 1) << 19 | |
(info.width - 1) << 6 | |
info.lod << 2 | |
BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1; |
dw4 = (info.depth - 1) << 21 | |
info.first_layer << 10 | |
(info.num_layers - 1) << 1; |
dw5 = info.y_offset << 16 | info.x_offset; |
dw6 = 0; |
} |
STATIC_ASSERT(Elements(zs->payload) >= 10); |
zs->payload[0] = dw1; |
zs->payload[1] = dw2; |
zs->payload[2] = dw3; |
zs->payload[3] = dw4; |
zs->payload[4] = dw5; |
zs->payload[5] = dw6; |
/* do not increment reference count */ |
zs->bo = info.zs.bo; |
/* separate stencil */ |
if (info.stencil.bo) { |
assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 && |
info.stencil.stride % 128 == 0); |
zs->payload[6] = info.stencil.stride - 1; |
zs->payload[7] = info.stencil.offset; |
/* do not increment reference count */ |
zs->separate_s8_bo = info.stencil.bo; |
} |
else { |
zs->payload[6] = 0; |
zs->payload[7] = 0; |
zs->separate_s8_bo = NULL; |
} |
/* hiz */ |
if (info.hiz.bo) { |
zs->payload[8] = info.hiz.stride - 1; |
zs->payload[9] = info.hiz.offset; |
/* do not increment reference count */ |
zs->hiz_bo = info.hiz.bo; |
} |
else { |
zs->payload[8] = 0; |
zs->payload[9] = 0; |
zs->hiz_bo = NULL; |
} |
} |
static void |
gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, |
const struct ilo_zs_surface *zs, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? |
ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05); |
const uint8_t cmd_len = 7; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, zs->payload[0]); |
ilo_cp_write_bo(cp, zs->payload[1], zs->bo, |
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); |
ilo_cp_write(cp, zs->payload[2]); |
ilo_cp_write(cp, zs->payload[3]); |
ilo_cp_write(cp, zs->payload[4]); |
ilo_cp_write(cp, zs->payload[5]); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev, |
int x_offset, int y_offset, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06); |
const uint8_t cmd_len = 2; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
assert(x_offset >= 0 && x_offset <= 31); |
assert(y_offset >= 0 && y_offset <= 31); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, x_offset << 8 | y_offset); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev, |
const struct pipe_poly_stipple *pattern, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07); |
const uint8_t cmd_len = 33; |
int i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
assert(Elements(pattern->stipple) == 32); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
for (i = 0; i < 32; i++) |
ilo_cp_write(cp, pattern->stipple[i]); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev, |
unsigned pattern, unsigned factor, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08); |
const uint8_t cmd_len = 3; |
unsigned inverse; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
assert((pattern & 0xffff) == pattern); |
assert(factor >= 1 && factor <= 256); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, pattern); |
if (dev->gen >= ILO_GEN(7)) { |
/* in U1.16 */ |
inverse = (unsigned) (65536.0f / factor); |
ilo_cp_write(cp, inverse << 15 | factor); |
} |
else { |
/* in U1.13 */ |
inverse = (unsigned) (8192.0f / factor); |
ilo_cp_write(cp, inverse << 16 | factor); |
} |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a); |
const uint8_t cmd_len = 3; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0 << 16 | 0); |
ilo_cp_write(cp, 0 << 16 | 0); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev, |
int index, unsigned svbi, |
unsigned max_svbi, |
bool load_vertex_count, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b); |
const uint8_t cmd_len = 4; |
uint32_t dw1; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(index >= 0 && index < 4); |
dw1 = index << SVB_INDEX_SHIFT; |
if (load_vertex_count) |
dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, svbi); |
ilo_cp_write(cp, max_svbi); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev, |
int num_samples, |
const uint32_t *packed_sample_pos, |
bool pixel_location_center, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d); |
const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3; |
uint32_t dw1, dw2, dw3; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
dw1 = (pixel_location_center) ? |
MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT; |
switch (num_samples) { |
case 0: |
case 1: |
dw1 |= MS_NUMSAMPLES_1; |
dw2 = 0; |
dw3 = 0; |
break; |
case 4: |
dw1 |= MS_NUMSAMPLES_4; |
dw2 = packed_sample_pos[0]; |
dw3 = 0; |
break; |
case 8: |
assert(dev->gen >= ILO_GEN(7)); |
dw1 |= MS_NUMSAMPLES_8; |
dw2 = packed_sample_pos[0]; |
dw3 = packed_sample_pos[1]; |
break; |
default: |
assert(!"unsupported sample count"); |
dw1 |= MS_NUMSAMPLES_1; |
dw2 = 0; |
dw3 = 0; |
break; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, dw2); |
if (dev->gen >= ILO_GEN(7)) |
ilo_cp_write(cp, dw3); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev, |
const struct ilo_zs_surface *zs, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? |
ILO_GPE_CMD(0x3, 0x0, 0x06) : |
ILO_GPE_CMD(0x3, 0x1, 0x0e); |
const uint8_t cmd_len = 3; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
/* see ilo_gpe_init_zs_surface() */ |
ilo_cp_write(cp, zs->payload[6]); |
ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo, |
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev, |
const struct ilo_zs_surface *zs, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ? |
ILO_GPE_CMD(0x3, 0x0, 0x07) : |
ILO_GPE_CMD(0x3, 0x1, 0x0f); |
const uint8_t cmd_len = 3; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
/* see ilo_gpe_init_zs_surface() */ |
ilo_cp_write(cp, zs->payload[8]); |
ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo, |
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, |
uint32_t clear_val, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10); |
const uint8_t cmd_len = 2; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | |
GEN5_DEPTH_CLEAR_VALID); |
ilo_cp_write(cp, clear_val); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev, |
uint32_t dw1, |
struct intel_bo *bo, uint32_t bo_offset, |
bool write_qword, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00); |
const uint8_t cmd_len = (write_qword) ? 5 : 4; |
const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; |
const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
if (dw1 & PIPE_CONTROL_CS_STALL) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 73: |
* |
* "1 of the following must also be set (when CS stall is set): |
* |
* * Depth Cache Flush Enable ([0] of DW1) |
* * Stall at Pixel Scoreboard ([1] of DW1) |
* * Depth Stall ([13] of DW1) |
* * Post-Sync Operation ([13] of DW1) |
* * Render Target Cache Flush Enable ([12] of DW1) |
* * Notify Enable ([8] of DW1)" |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 61: |
* |
* "One of the following must also be set (when CS stall is set): |
* |
* * Render Target Cache Flush Enable ([12] of DW1) |
* * Depth Cache Flush Enable ([0] of DW1) |
* * Stall at Pixel Scoreboard ([1] of DW1) |
* * Depth Stall ([13] of DW1) |
* * Post-Sync Operation ([13] of DW1)" |
*/ |
uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH | |
PIPE_CONTROL_DEPTH_CACHE_FLUSH | |
PIPE_CONTROL_STALL_AT_SCOREBOARD | |
PIPE_CONTROL_DEPTH_STALL; |
/* post-sync op */ |
bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE | |
PIPE_CONTROL_WRITE_DEPTH_COUNT | |
PIPE_CONTROL_WRITE_TIMESTAMP; |
if (dev->gen == ILO_GEN(6)) |
bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE; |
assert(dw1 & bit_test); |
} |
if (dw1 & PIPE_CONTROL_DEPTH_STALL) { |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 73: |
* |
* "Following bits must be clear (when Depth Stall is set): |
* |
* * Render Target Cache Flush Enable ([12] of DW1) |
* * Depth Cache Flush Enable ([0] of DW1)" |
*/ |
assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH | |
PIPE_CONTROL_DEPTH_CACHE_FLUSH))); |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain); |
ilo_cp_write(cp, 0); |
if (write_qword) |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
static void |
gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, |
const struct pipe_draw_info *info, |
const struct ilo_ib_state *ib, |
bool rectlist, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); |
const uint8_t cmd_len = 6; |
const int prim = (rectlist) ? |
_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); |
const int vb_access = (info->indexed) ? |
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : |
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; |
const uint32_t vb_start = info->start + |
((info->indexed) ? ib->draw_start_offset : 0); |
ILO_GPE_VALID_GEN(dev, 6, 6); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2) | |
prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | |
vb_access); |
ilo_cp_write(cp, info->count); |
ilo_cp_write(cp, vb_start); |
ilo_cp_write(cp, info->instance_count); |
ilo_cp_write(cp, info->start_instance); |
ilo_cp_write(cp, info->index_bias); |
ilo_cp_end(cp); |
} |
static uint32_t |
gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev, |
const struct ilo_shader_state **cs, |
uint32_t *sampler_state, |
int *num_samplers, |
uint32_t *binding_table_state, |
int *num_surfaces, |
int num_ids, |
struct ilo_cp *cp) |
{ |
/* |
* From the Sandy Bridge PRM, volume 2 part 2, page 34: |
* |
* "(Interface Descriptor Total Length) This field must have the same |
* alignment as the Interface Descriptor Data Start Address. |
* |
* It must be DQWord (32-byte) aligned..." |
* |
* From the Sandy Bridge PRM, volume 2 part 2, page 35: |
* |
* "(Interface Descriptor Data Start Address) Specifies the 32-byte |
* aligned address of the Interface Descriptor data." |
*/ |
const int state_align = 32 / 4; |
const int state_len = (32 / 4) * num_ids; |
uint32_t state_offset, *dw; |
int i; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA", |
state_len, state_align, &state_offset); |
for (i = 0; i < num_ids; i++) { |
dw[0] = ilo_shader_get_kernel_offset(cs[i]); |
dw[1] = 1 << 18; /* SPF */ |
dw[2] = sampler_state[i] | |
(num_samplers[i] + 3) / 4 << 2; |
dw[3] = binding_table_state[i] | |
num_surfaces[i]; |
dw[4] = 0 << 16 | /* CURBE Read Length */ |
0; /* CURBE Read Offset */ |
dw[5] = 0; /* Barrier ID */ |
dw[6] = 0; |
dw[7] = 0; |
dw += 8; |
} |
return state_offset; |
} |
static void |
viewport_get_guardband(const struct ilo_dev_info *dev, |
int center_x, int center_y, |
int *min_gbx, int *max_gbx, |
int *min_gby, int *max_gby) |
{ |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 234: |
* |
* "Per-Device Guardband Extents |
* |
* - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] |
* - Maximum Post-Clamp Delta (X or Y): 16K" |
* |
* "In addition, in order to be correctly rendered, objects must have a |
* screenspace bounding box not exceeding 8K in the X or Y direction. |
* This additional restriction must also be comprehended by software, |
* i.e., enforced by use of clipping." |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 248: |
* |
* "Per-Device Guardband Extents |
* |
* - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] |
* - Maximum Post-Clamp Delta (X or Y): N/A" |
* |
* "In addition, in order to be correctly rendered, objects must have a |
* screenspace bounding box not exceeding 8K in the X or Y direction. |
* This additional restriction must also be comprehended by software, |
* i.e., enforced by use of clipping." |
* |
* Combined, the bounding box of any object can not exceed 8K in both |
* width and height. |
* |
* Below we set the guardband as a squre of length 8K, centered at where |
* the viewport is. This makes sure all objects passing the GB test are |
* valid to the renderer, and those failing the XY clipping have a |
* better chance of passing the GB test. |
*/ |
const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384; |
const int half_len = 8192 / 2; |
/* make sure the guardband is within the valid range */ |
if (center_x - half_len < -max_extent) |
center_x = -max_extent + half_len; |
else if (center_x + half_len > max_extent - 1) |
center_x = max_extent - half_len; |
if (center_y - half_len < -max_extent) |
center_y = -max_extent + half_len; |
else if (center_y + half_len > max_extent - 1) |
center_y = max_extent - half_len; |
*min_gbx = (float) (center_x - half_len); |
*max_gbx = (float) (center_x + half_len); |
*min_gby = (float) (center_y - half_len); |
*max_gby = (float) (center_y + half_len); |
} |
void |
ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, |
const struct pipe_viewport_state *state, |
struct ilo_viewport_cso *vp) |
{ |
const float scale_x = fabs(state->scale[0]); |
const float scale_y = fabs(state->scale[1]); |
const float scale_z = fabs(state->scale[2]); |
int min_gbx, max_gbx, min_gby, max_gby; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
viewport_get_guardband(dev, |
(int) state->translate[0], |
(int) state->translate[1], |
&min_gbx, &max_gbx, &min_gby, &max_gby); |
/* matrix form */ |
vp->m00 = state->scale[0]; |
vp->m11 = state->scale[1]; |
vp->m22 = state->scale[2]; |
vp->m30 = state->translate[0]; |
vp->m31 = state->translate[1]; |
vp->m32 = state->translate[2]; |
/* guardband in NDC space */ |
vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x; |
vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x; |
vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y; |
vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y; |
/* viewport in screen space */ |
vp->min_x = scale_x * -1.0f + state->translate[0]; |
vp->max_x = scale_x * 1.0f + state->translate[0]; |
vp->min_y = scale_y * -1.0f + state->translate[1]; |
vp->max_y = scale_y * 1.0f + state->translate[1]; |
vp->min_z = scale_z * -1.0f + state->translate[2]; |
vp->max_z = scale_z * 1.0f + state->translate[2]; |
} |
static uint32_t |
gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = 8 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 262: |
* |
* "The viewport-specific state used by the SF unit (SF_VIEWPORT) is |
* stored as an array of up to 16 elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT", |
state_len, state_align, &state_offset); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->m00); |
dw[1] = fui(vp->m11); |
dw[2] = fui(vp->m22); |
dw[3] = fui(vp->m30); |
dw[4] = fui(vp->m31); |
dw[5] = fui(vp->m32); |
dw[6] = 0; |
dw[7] = 0; |
dw += 8; |
} |
return state_offset; |
} |
static uint32_t |
gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = 4 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 193: |
* |
* "The viewport-related state is stored as an array of up to 16 |
* elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT", |
state_len, state_align, &state_offset); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->min_gbx); |
dw[1] = fui(vp->max_gbx); |
dw[2] = fui(vp->min_gby); |
dw[3] = fui(vp->max_gby); |
dw += 4; |
} |
return state_offset; |
} |
static uint32_t |
gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = 2 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 385: |
* |
* "The viewport state is stored as an array of up to 16 elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT", |
state_len, state_align, &state_offset); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->min_z); |
dw[1] = fui(vp->max_z); |
dw += 2; |
} |
return state_offset; |
} |
static uint32_t |
gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev, |
const struct pipe_stencil_ref *stencil_ref, |
float alpha_ref, |
const struct pipe_blend_color *blend_color, |
struct ilo_cp *cp) |
{ |
const int state_align = 64 / 4; |
const int state_len = 6; |
uint32_t state_offset, *dw; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE", |
state_len, state_align, &state_offset); |
dw[0] = stencil_ref->ref_value[0] << 24 | |
stencil_ref->ref_value[1] << 16 | |
BRW_ALPHATEST_FORMAT_UNORM8; |
dw[1] = float_to_ubyte(alpha_ref); |
dw[2] = fui(blend_color->color[0]); |
dw[3] = fui(blend_color->color[1]); |
dw[4] = fui(blend_color->color[2]); |
dw[5] = fui(blend_color->color[3]); |
return state_offset; |
} |
static int |
gen6_blend_factor_dst_alpha_forced_one(int factor) |
{ |
switch (factor) { |
case BRW_BLENDFACTOR_DST_ALPHA: |
return BRW_BLENDFACTOR_ONE; |
case BRW_BLENDFACTOR_INV_DST_ALPHA: |
case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE: |
return BRW_BLENDFACTOR_ZERO; |
default: |
return factor; |
} |
} |
static uint32_t |
blend_get_rt_blend_enable(const struct ilo_dev_info *dev, |
const struct pipe_rt_blend_state *rt, |
bool dst_alpha_forced_one) |
{ |
int rgb_src, rgb_dst, a_src, a_dst; |
uint32_t dw; |
if (!rt->blend_enable) |
return 0; |
rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor); |
rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor); |
a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor); |
a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor); |
if (dst_alpha_forced_one) { |
rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src); |
rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst); |
a_src = gen6_blend_factor_dst_alpha_forced_one(a_src); |
a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst); |
} |
dw = 1 << 31 | |
gen6_translate_pipe_blend(rt->alpha_func) << 26 | |
a_src << 20 | |
a_dst << 15 | |
gen6_translate_pipe_blend(rt->rgb_func) << 11 | |
rgb_src << 5 | |
rgb_dst; |
if (rt->rgb_func != rt->alpha_func || |
rgb_src != a_src || rgb_dst != a_dst) |
dw |= 1 << 30; |
return dw; |
} |
void |
ilo_gpe_init_blend(const struct ilo_dev_info *dev, |
const struct pipe_blend_state *state, |
struct ilo_blend_state *blend) |
{ |
unsigned num_cso, i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
if (state->independent_blend_enable) { |
num_cso = Elements(blend->cso); |
} |
else { |
memset(blend->cso, 0, sizeof(blend->cso)); |
num_cso = 1; |
} |
blend->independent_blend_enable = state->independent_blend_enable; |
blend->alpha_to_coverage = state->alpha_to_coverage; |
blend->dual_blend = false; |
for (i = 0; i < num_cso; i++) { |
const struct pipe_rt_blend_state *rt = &state->rt[i]; |
struct ilo_blend_cso *cso = &blend->cso[i]; |
bool dual_blend; |
cso->payload[0] = 0; |
cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 | |
0x3; |
if (!(rt->colormask & PIPE_MASK_A)) |
cso->payload[1] |= 1 << 27; |
if (!(rt->colormask & PIPE_MASK_R)) |
cso->payload[1] |= 1 << 26; |
if (!(rt->colormask & PIPE_MASK_G)) |
cso->payload[1] |= 1 << 25; |
if (!(rt->colormask & PIPE_MASK_B)) |
cso->payload[1] |= 1 << 24; |
if (state->dither) |
cso->payload[1] |= 1 << 12; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 365: |
* |
* "Color Buffer Blending and Logic Ops must not be enabled |
* simultaneously, or behavior is UNDEFINED." |
* |
* Since state->logicop_enable takes precedence over rt->blend_enable, |
* no special care is needed. |
*/ |
if (state->logicop_enable) { |
cso->dw_logicop = 1 << 22 | |
gen6_translate_pipe_logicop(state->logicop_func) << 18; |
cso->dw_blend = 0; |
cso->dw_blend_dst_alpha_forced_one = 0; |
dual_blend = false; |
} |
else { |
cso->dw_logicop = 0; |
cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false); |
cso->dw_blend_dst_alpha_forced_one = |
blend_get_rt_blend_enable(dev, rt, true); |
dual_blend = (rt->blend_enable && |
util_blend_state_is_dual(state, i)); |
} |
cso->dw_alpha_mod = 0; |
if (state->alpha_to_coverage) { |
cso->dw_alpha_mod |= 1 << 31; |
if (dev->gen >= ILO_GEN(7)) |
cso->dw_alpha_mod |= 1 << 29; |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 378: |
* |
* "If Dual Source Blending is enabled, this bit (AlphaToOne Enable) |
* must be disabled." |
*/ |
if (state->alpha_to_one && !dual_blend) |
cso->dw_alpha_mod |= 1 << 30; |
if (dual_blend) |
blend->dual_blend = true; |
} |
} |
static uint32_t |
gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev, |
const struct ilo_blend_state *blend, |
const struct ilo_fb_state *fb, |
const struct pipe_alpha_state *alpha, |
struct ilo_cp *cp) |
{ |
const int state_align = 64 / 4; |
int state_len; |
uint32_t state_offset, *dw; |
unsigned num_targets, i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 376: |
* |
* "The blend state is stored as an array of up to 8 elements..." |
*/ |
num_targets = fb->state.nr_cbufs; |
assert(num_targets <= 8); |
if (!num_targets) { |
if (!alpha->enabled) |
return 0; |
/* to be able to reference alpha func */ |
num_targets = 1; |
} |
state_len = 2 * num_targets; |
dw = ilo_cp_steal_ptr(cp, "BLEND_STATE", |
state_len, state_align, &state_offset); |
for (i = 0; i < num_targets; i++) { |
const unsigned idx = (blend->independent_blend_enable) ? i : 0; |
const struct ilo_blend_cso *cso = &blend->cso[idx]; |
const int num_samples = fb->num_samples; |
const struct util_format_description *format_desc = |
(idx < fb->state.nr_cbufs) ? |
util_format_description(fb->state.cbufs[idx]->format) : NULL; |
bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; |
rt_is_unorm = true; |
rt_is_pure_integer = false; |
rt_dst_alpha_forced_one = false; |
if (format_desc) { |
int ch; |
switch (format_desc->format) { |
case PIPE_FORMAT_B8G8R8X8_UNORM: |
/* force alpha to one when the HW format has alpha */ |
assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM) |
== BRW_SURFACEFORMAT_B8G8R8A8_UNORM); |
rt_dst_alpha_forced_one = true; |
break; |
default: |
break; |
} |
for (ch = 0; ch < 4; ch++) { |
if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) |
continue; |
if (format_desc->channel[ch].pure_integer) { |
rt_is_unorm = false; |
rt_is_pure_integer = true; |
break; |
} |
if (!format_desc->channel[ch].normalized || |
format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) |
rt_is_unorm = false; |
} |
} |
dw[0] = cso->payload[0]; |
dw[1] = cso->payload[1]; |
if (!rt_is_pure_integer) { |
if (rt_dst_alpha_forced_one) |
dw[0] |= cso->dw_blend_dst_alpha_forced_one; |
else |
dw[0] |= cso->dw_blend; |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 365: |
* |
* "Logic Ops are only supported on *_UNORM surfaces (excluding |
* _SRGB variants), otherwise Logic Ops must be DISABLED." |
* |
* Since logicop is ignored for non-UNORM color buffers, no special care |
* is needed. |
*/ |
if (rt_is_unorm) |
dw[1] |= cso->dw_logicop; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 356: |
* |
* "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage |
* Dither both must be disabled." |
* |
* There is no such limitation on GEN7, or for AlphaToOne. But GL |
* requires that anyway. |
*/ |
if (num_samples > 1) |
dw[1] |= cso->dw_alpha_mod; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 382: |
* |
* "Alpha Test can only be enabled if Pixel Shader outputs a float |
* alpha value." |
*/ |
if (alpha->enabled && !rt_is_pure_integer) { |
dw[1] |= 1 << 16 | |
gen6_translate_dsa_func(alpha->func) << 13; |
} |
dw += 2; |
} |
return state_offset; |
} |
void |
ilo_gpe_init_dsa(const struct ilo_dev_info *dev, |
const struct pipe_depth_stencil_alpha_state *state, |
struct ilo_dsa_state *dsa) |
{ |
const struct pipe_depth_state *depth = &state->depth; |
const struct pipe_stencil_state *stencil0 = &state->stencil[0]; |
const struct pipe_stencil_state *stencil1 = &state->stencil[1]; |
uint32_t *dw; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* copy alpha state for later use */ |
dsa->alpha = state->alpha; |
STATIC_ASSERT(Elements(dsa->payload) >= 3); |
dw = dsa->payload; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 359: |
* |
* "If the Depth Buffer is either undefined or does not have a surface |
* format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate |
* stencil buffer is disabled, Stencil Test Enable must be DISABLED" |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 370: |
* |
* "This field (Stencil Test Enable) cannot be enabled if |
* Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM." |
* |
* TODO We do not check these yet. |
*/ |
if (stencil0->enabled) { |
dw[0] = 1 << 31 | |
gen6_translate_dsa_func(stencil0->func) << 28 | |
gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 | |
gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 | |
gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19; |
if (stencil0->writemask) |
dw[0] |= 1 << 18; |
dw[1] = stencil0->valuemask << 24 | |
stencil0->writemask << 16; |
if (stencil1->enabled) { |
dw[0] |= 1 << 15 | |
gen6_translate_dsa_func(stencil1->func) << 12 | |
gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 | |
gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 | |
gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3; |
if (stencil1->writemask) |
dw[0] |= 1 << 18; |
dw[1] |= stencil1->valuemask << 8 | |
stencil1->writemask; |
} |
} |
else { |
dw[0] = 0; |
dw[1] = 0; |
} |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 360: |
* |
* "Enabling the Depth Test function without defining a Depth Buffer is |
* UNDEFINED." |
* |
* From the Sandy Bridge PRM, volume 2 part 1, page 375: |
* |
* "A Depth Buffer must be defined before enabling writes to it, or |
* operation is UNDEFINED." |
* |
* TODO We do not check these yet. |
*/ |
dw[2] = depth->enabled << 31 | |
depth->writemask << 26; |
if (depth->enabled) |
dw[2] |= gen6_translate_dsa_func(depth->func) << 27; |
else |
dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27; |
} |
static uint32_t |
gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev, |
const struct ilo_dsa_state *dsa, |
struct ilo_cp *cp) |
{ |
const int state_align = 64 / 4; |
const int state_len = 3; |
uint32_t state_offset, *dw; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE", |
state_len, state_align, &state_offset); |
dw[0] = dsa->payload[0]; |
dw[1] = dsa->payload[1]; |
dw[2] = dsa->payload[2]; |
return state_offset; |
} |
void |
ilo_gpe_set_scissor(const struct ilo_dev_info *dev, |
unsigned start_slot, |
unsigned num_states, |
const struct pipe_scissor_state *states, |
struct ilo_scissor_state *scissor) |
{ |
unsigned i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
for (i = 0; i < num_states; i++) { |
uint16_t min_x, min_y, max_x, max_y; |
/* both max and min are inclusive in SCISSOR_RECT */ |
if (states[i].minx < states[i].maxx && |
states[i].miny < states[i].maxy) { |
min_x = states[i].minx; |
min_y = states[i].miny; |
max_x = states[i].maxx - 1; |
max_y = states[i].maxy - 1; |
} |
else { |
/* we have to make min greater than max */ |
min_x = 1; |
min_y = 1; |
max_x = 0; |
max_y = 0; |
} |
scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x; |
scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x; |
} |
if (!start_slot && num_states) |
scissor->scissor0 = states[0]; |
} |
void |
ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev, |
struct ilo_scissor_state *scissor) |
{ |
unsigned i; |
for (i = 0; i < Elements(scissor->payload); i += 2) { |
scissor->payload[i + 0] = 1 << 16 | 1; |
scissor->payload[i + 1] = 0; |
} |
} |
static uint32_t |
gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev, |
const struct ilo_scissor_state *scissor, |
unsigned num_viewports, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = 2 * num_viewports; |
uint32_t state_offset, *dw; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 263: |
* |
* "The viewport-specific state used by the SF unit (SCISSOR_RECT) is |
* stored as an array of up to 16 elements..." |
*/ |
assert(num_viewports && num_viewports <= 16); |
dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT", |
state_len, state_align, &state_offset); |
memcpy(dw, scissor->payload, state_len * 4); |
return state_offset; |
} |
static uint32_t |
gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev, |
uint32_t *surface_states, |
int num_surface_states, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = num_surface_states; |
uint32_t state_offset, *dw; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 69: |
* |
* "It is stored as an array of up to 256 elements..." |
*/ |
assert(num_surface_states <= 256); |
if (!num_surface_states) |
return 0; |
dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE", |
state_len, state_align, &state_offset); |
memcpy(dw, surface_states, |
num_surface_states * sizeof(surface_states[0])); |
return state_offset; |
} |
void |
ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf) |
{ |
uint32_t *dw; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 71: |
* |
* "A null surface will be used in instances where an actual surface is |
* not bound. When a write message is generated to a null surface, no |
* actual surface is written to. When a read message (including any |
* sampling engine message) is generated to a null surface, the result |
* is all zeros. Note that a null surface type is allowed to be used |
* with all messages, even if it is not specificially indicated as |
* supported. All of the remaining fields in surface state are ignored |
* for null surfaces, with the following exceptions: |
* |
* * [DevSNB+]: Width, Height, Depth, and LOD fields must match the |
* depth buffer's corresponding state for all render target |
* surfaces, including null. |
* * Surface Format must be R8G8B8A8_UNORM." |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 82: |
* |
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be |
* true" |
*/ |
STATIC_ASSERT(Elements(surf->payload) >= 6); |
dw = surf->payload; |
dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT; |
dw[1] = 0; |
dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT | |
(width - 1) << BRW_SURFACE_WIDTH_SHIFT | |
level << BRW_SURFACE_LOD_SHIFT; |
dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT | |
BRW_SURFACE_TILED; |
dw[4] = 0; |
dw[5] = 0; |
surf->bo = NULL; |
} |
void |
ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
const int elem_size = util_format_get_blocksize(elem_format); |
int width, height, depth, pitch; |
int surface_format, num_entries; |
uint32_t *dw; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
/* |
* For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a |
* structure in a buffer. |
*/ |
surface_format = ilo_translate_color_format(elem_format); |
num_entries = size / struct_size; |
/* see if there is enough space to fit another element */ |
if (size % struct_size >= elem_size) |
num_entries++; |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 76: |
* |
* "For SURFTYPE_BUFFER render targets, this field (Surface Base |
* Address) specifies the base address of first element of the |
* surface. The surface is interpreted as a simple array of that |
* single element type. The address must be naturally-aligned to the |
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements |
* must be 16-byte aligned). |
* |
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies |
* the base address of the first element of the surface, computed in |
* software by adding the surface base address to the byte offset of |
* the element in the buffer." |
*/ |
if (is_rt) |
assert(offset % elem_size == 0); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 77: |
* |
* "For buffer surfaces, the number of entries in the buffer ranges |
* from 1 to 2^27." |
*/ |
assert(num_entries >= 1 && num_entries <= 1 << 27); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 81: |
* |
* "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch) |
* indicates the size of the structure." |
*/ |
pitch = struct_size; |
pitch--; |
num_entries--; |
/* bits [6:0] */ |
width = (num_entries & 0x0000007f); |
/* bits [19:7] */ |
height = (num_entries & 0x000fff80) >> 7; |
/* bits [26:20] */ |
depth = (num_entries & 0x07f00000) >> 20; |
STATIC_ASSERT(Elements(surf->payload) >= 6); |
dw = surf->payload; |
dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT | |
surface_format << BRW_SURFACE_FORMAT_SHIFT; |
if (render_cache_rw) |
dw[0] |= BRW_SURFACE_RC_READ_WRITE; |
dw[1] = offset; |
dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT | |
width << BRW_SURFACE_WIDTH_SHIFT; |
dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT | |
pitch << BRW_SURFACE_PITCH_SHIFT; |
dw[4] = 0; |
dw[5] = 0; |
/* do not increment reference count */ |
surf->bo = buf->bo; |
} |
void |
ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
int surface_type, surface_format; |
int width, height, depth, pitch, lod; |
unsigned layer_offset, x_offset, y_offset; |
uint32_t *dw; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); |
assert(surface_type != BRW_SURFACE_BUFFER); |
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) |
format = PIPE_FORMAT_Z32_FLOAT; |
if (is_rt) |
surface_format = ilo_translate_render_format(format); |
else |
surface_format = ilo_translate_texture_format(format); |
assert(surface_format >= 0); |
width = tex->base.width0; |
height = tex->base.height0; |
depth = (tex->base.target == PIPE_TEXTURE_3D) ? |
tex->base.depth0 : num_layers; |
pitch = tex->bo_stride; |
if (surface_type == BRW_SURFACE_CUBE) { |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 81: |
* |
* "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the |
* range of this field (Depth) is [0,84], indicating the number of |
* cube array elements (equal to the number of underlying 2D array |
* elements divided by 6). For other surfaces, this field must be |
* zero." |
* |
* When is_rt is true, we treat the texture as a 2D one to avoid the |
* restriction. |
*/ |
if (is_rt) { |
surface_type = BRW_SURFACE_2D; |
} |
else { |
assert(num_layers % 6 == 0); |
depth = num_layers / 6; |
} |
} |
/* sanity check the size */ |
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); |
switch (surface_type) { |
case BRW_SURFACE_1D: |
assert(width <= 8192 && height == 1 && depth <= 512); |
assert(first_layer < 512 && num_layers <= 512); |
break; |
case BRW_SURFACE_2D: |
assert(width <= 8192 && height <= 8192 && depth <= 512); |
assert(first_layer < 512 && num_layers <= 512); |
break; |
case BRW_SURFACE_3D: |
assert(width <= 2048 && height <= 2048 && depth <= 2048); |
assert(first_layer < 2048 && num_layers <= 512); |
if (!is_rt) |
assert(first_layer == 0); |
break; |
case BRW_SURFACE_CUBE: |
assert(width <= 8192 && height <= 8192 && depth <= 85); |
assert(width == height); |
assert(first_layer < 512 && num_layers <= 512); |
if (is_rt) |
assert(first_layer == 0); |
break; |
default: |
assert(!"unexpected surface type"); |
break; |
} |
/* non-full array spacing is supported only on GEN7+ */ |
assert(tex->array_spacing_full); |
/* non-interleaved samples are supported only on GEN7+ */ |
if (tex->base.nr_samples > 1) |
assert(tex->interleaved); |
if (is_rt) { |
/* |
* Compute the offset to the layer manually. |
* |
* For rendering, the hardware requires LOD to be the same for all |
* render targets and the depth buffer. We need to compute the offset |
* to the layer manually and always set LOD to 0. |
*/ |
if (true) { |
/* we lose the capability for layered rendering */ |
assert(num_layers == 1); |
layer_offset = ilo_texture_get_slice_offset(tex, |
first_level, first_layer, &x_offset, &y_offset); |
assert(x_offset % 4 == 0); |
assert(y_offset % 2 == 0); |
x_offset /= 4; |
y_offset /= 2; |
/* derive the size for the LOD */ |
width = u_minify(width, first_level); |
height = u_minify(height, first_level); |
if (surface_type == BRW_SURFACE_3D) |
depth = u_minify(depth, first_level); |
else |
depth = 1; |
first_level = 0; |
first_layer = 0; |
lod = 0; |
} |
else { |
layer_offset = 0; |
x_offset = 0; |
y_offset = 0; |
} |
assert(num_levels == 1); |
lod = first_level; |
} |
else { |
layer_offset = 0; |
x_offset = 0; |
y_offset = 0; |
lod = num_levels - 1; |
} |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 76: |
* |
* "Linear render target surface base addresses must be element-size |
* aligned, for non-YUV surface formats, or a multiple of 2 |
* element-sizes for YUV surface formats. Other linear surfaces have |
* no alignment requirements (byte alignment is sufficient.)" |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 81: |
* |
* "For linear render target surfaces, the pitch must be a multiple |
* of the element size for non-YUV surface formats. Pitch must be a |
* multiple of 2 * element size for YUV surface formats." |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 86: |
* |
* "For linear surfaces, this field (X Offset) must be zero" |
*/ |
if (tex->tiling == INTEL_TILING_NONE) { |
if (is_rt) { |
const int elem_size = util_format_get_blocksize(format); |
assert(layer_offset % elem_size == 0); |
assert(pitch % elem_size == 0); |
} |
assert(!x_offset); |
} |
STATIC_ASSERT(Elements(surf->payload) >= 6); |
dw = surf->payload; |
dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT | |
surface_format << BRW_SURFACE_FORMAT_SHIFT | |
BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT; |
if (surface_type == BRW_SURFACE_CUBE && !is_rt) { |
dw[0] |= 1 << 9 | |
BRW_SURFACE_CUBEFACE_ENABLES; |
} |
if (render_cache_rw) |
dw[0] |= BRW_SURFACE_RC_READ_WRITE; |
dw[1] = layer_offset; |
dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT | |
(width - 1) << BRW_SURFACE_WIDTH_SHIFT | |
lod << BRW_SURFACE_LOD_SHIFT; |
dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT | |
(pitch - 1) << BRW_SURFACE_PITCH_SHIFT | |
ilo_gpe_gen6_translate_winsys_tiling(tex->tiling); |
dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT | |
first_layer << 17 | |
(num_layers - 1) << 8 | |
((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 : |
BRW_SURFACE_MULTISAMPLECOUNT_1); |
dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT | |
y_offset << BRW_SURFACE_Y_OFFSET_SHIFT; |
if (tex->valign_4) |
dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE; |
/* do not increment reference count */ |
surf->bo = tex->bo; |
} |
static uint32_t |
gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev, |
const struct ilo_view_surface *surf, |
bool for_render, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6; |
uint32_t state_offset; |
uint32_t read_domains, write_domain; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
if (for_render) { |
read_domains = INTEL_DOMAIN_RENDER; |
write_domain = INTEL_DOMAIN_RENDER; |
} |
else { |
read_domains = INTEL_DOMAIN_SAMPLER; |
write_domain = 0; |
} |
ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset); |
STATIC_ASSERT(Elements(surf->payload) >= 8); |
ilo_cp_write(cp, surf->payload[0]); |
ilo_cp_write_bo(cp, surf->payload[1], |
surf->bo, read_domains, write_domain); |
ilo_cp_write(cp, surf->payload[2]); |
ilo_cp_write(cp, surf->payload[3]); |
ilo_cp_write(cp, surf->payload[4]); |
ilo_cp_write(cp, surf->payload[5]); |
if (dev->gen >= ILO_GEN(7)) { |
ilo_cp_write(cp, surf->payload[6]); |
ilo_cp_write(cp, surf->payload[7]); |
} |
ilo_cp_end(cp); |
return state_offset; |
} |
static uint32_t |
gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev, |
const struct pipe_stream_output_target *so, |
const struct pipe_stream_output_info *so_info, |
int so_index, |
struct ilo_cp *cp) |
{ |
struct ilo_buffer *buf = ilo_buffer(so->buffer); |
unsigned bo_offset, struct_size; |
enum pipe_format elem_format; |
struct ilo_view_surface surf; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; |
struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; |
switch (so_info->output[so_index].num_components) { |
case 1: |
elem_format = PIPE_FORMAT_R32_FLOAT; |
break; |
case 2: |
elem_format = PIPE_FORMAT_R32G32_FLOAT; |
break; |
case 3: |
elem_format = PIPE_FORMAT_R32G32B32_FLOAT; |
break; |
case 4: |
elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; |
break; |
default: |
assert(!"unexpected SO components length"); |
elem_format = PIPE_FORMAT_R32_FLOAT; |
break; |
} |
ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size, |
struct_size, elem_format, false, true, &surf); |
return gen6_emit_SURFACE_STATE(dev, &surf, false, cp); |
} |
static void |
sampler_init_border_color_gen6(const struct ilo_dev_info *dev, |
const union pipe_color_union *color, |
uint32_t *dw, int num_dwords) |
{ |
float rgba[4] = { |
color->f[0], color->f[1], color->f[2], color->f[3], |
}; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(num_dwords >= 12); |
/* |
* This state is not documented in the Sandy Bridge PRM, but in the |
* Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1. |
*/ |
/* IEEE_FP */ |
dw[1] = fui(rgba[0]); |
dw[2] = fui(rgba[1]); |
dw[3] = fui(rgba[2]); |
dw[4] = fui(rgba[3]); |
/* FLOAT_16 */ |
dw[5] = util_float_to_half(rgba[0]) | |
util_float_to_half(rgba[1]) << 16; |
dw[6] = util_float_to_half(rgba[2]) | |
util_float_to_half(rgba[3]) << 16; |
/* clamp to [-1.0f, 1.0f] */ |
rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f); |
rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f); |
rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f); |
rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f); |
/* SNORM16 */ |
dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) | |
(int16_t) util_iround(rgba[1] * 32767.0f) << 16; |
dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) | |
(int16_t) util_iround(rgba[3] * 32767.0f) << 16; |
/* SNORM8 */ |
dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) | |
(int8_t) util_iround(rgba[1] * 127.0f) << 8 | |
(int8_t) util_iround(rgba[2] * 127.0f) << 16 | |
(int8_t) util_iround(rgba[3] * 127.0f) << 24; |
/* clamp to [0.0f, 1.0f] */ |
rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f); |
rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f); |
rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f); |
rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f); |
/* UNORM8 */ |
dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) | |
(uint8_t) util_iround(rgba[1] * 255.0f) << 8 | |
(uint8_t) util_iround(rgba[2] * 255.0f) << 16 | |
(uint8_t) util_iround(rgba[3] * 255.0f) << 24; |
/* UNORM16 */ |
dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) | |
(uint16_t) util_iround(rgba[1] * 65535.0f) << 16; |
dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) | |
(uint16_t) util_iround(rgba[3] * 65535.0f) << 16; |
} |
void |
ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, |
const struct pipe_sampler_state *state, |
struct ilo_sampler_cso *sampler) |
{ |
int mip_filter, min_filter, mag_filter, max_aniso; |
int lod_bias, max_lod, min_lod; |
int wrap_s, wrap_t, wrap_r, wrap_cube; |
bool clamp_is_to_edge; |
uint32_t dw0, dw1, dw3; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
memset(sampler, 0, sizeof(*sampler)); |
mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter); |
min_filter = gen6_translate_tex_filter(state->min_img_filter); |
mag_filter = gen6_translate_tex_filter(state->mag_img_filter); |
sampler->anisotropic = state->max_anisotropy; |
if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16) |
max_aniso = state->max_anisotropy / 2 - 1; |
else if (state->max_anisotropy > 16) |
max_aniso = BRW_ANISORATIO_16; |
else |
max_aniso = BRW_ANISORATIO_2; |
/* |
* |
* Here is how the hardware calculate per-pixel LOD, from my reading of the |
* PRMs: |
* |
* 1) LOD is set to log2(ratio of texels to pixels) if not specified in |
* other ways. The number of texels is measured using level |
* SurfMinLod. |
* 2) Bias is added to LOD. |
* 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is |
* compared with Base to determine whether magnification or |
* minification is needed. (if preclamp is disabled, LOD is compared |
* with Base before clamping) |
* 4) If magnification is needed, or no mipmapping is requested, LOD is |
* set to floor(MinLod). |
* 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD. |
* |
* With Gallium interface, Base is always zero and |
* pipe_sampler_view::u.tex.first_level specifies SurfMinLod. |
*/ |
if (dev->gen >= ILO_GEN(7)) { |
const float scale = 256.0f; |
/* [-16.0, 16.0) in S4.8 */ |
lod_bias = (int) |
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); |
lod_bias &= 0x1fff; |
/* [0.0, 14.0] in U4.8 */ |
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale); |
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale); |
} |
else { |
const float scale = 64.0f; |
/* [-16.0, 16.0) in S4.6 */ |
lod_bias = (int) |
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale); |
lod_bias &= 0x7ff; |
/* [0.0, 13.0] in U4.6 */ |
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale); |
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale); |
} |
/* |
* We want LOD to be clamped to determine magnification/minification, and |
* get set to zero when it is magnification or when mipmapping is disabled. |
* The hardware would set LOD to floor(MinLod) and that is a problem when |
* MinLod is greater than or equal to 1.0f. |
* |
* With Base being zero, it is always minification when MinLod is non-zero. |
* To achieve our goal, we just need to set MinLod to zero and set |
* MagFilter to MinFilter when mipmapping is disabled. |
*/ |
if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) { |
min_lod = 0; |
mag_filter = min_filter; |
} |
/* |
* For nearest filtering, PIPE_TEX_WRAP_CLAMP means |
* PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP |
* means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the |
* texture coordinates to [0.0, 1.0]. |
* |
* The clamping will be taken care of in the shaders. There are two |
* filters here, but let the minification one has a say. |
*/ |
clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST); |
if (!clamp_is_to_edge) { |
sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP); |
sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP); |
sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP); |
} |
/* determine wrap s/t/r */ |
wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge); |
wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge); |
wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 107: |
* |
* "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP |
* and TEXCOORDMODE_CUBE settings are valid, and each TC component |
* must have the same Address Control mode." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 96: |
* |
* "This field (Cube Surface Control Mode) must be set to |
* CUBECTRLMODE_PROGRAMMED" |
* |
* Therefore, we cannot use "Cube Surface Control Mode" for semless cube |
* map filtering. |
*/ |
if (state->seamless_cube_map && |
(state->min_img_filter != PIPE_TEX_FILTER_NEAREST || |
state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) { |
wrap_cube = BRW_TEXCOORDMODE_CUBE; |
} |
else { |
wrap_cube = BRW_TEXCOORDMODE_CLAMP; |
} |
if (!state->normalized_coords) { |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 98: |
* |
* "The following state must be set as indicated if this field |
* (Non-normalized Coordinate Enable) is enabled: |
* |
* - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP, |
* TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER. |
* - Surface Type must be SURFTYPE_2D or SURFTYPE_3D. |
* - Mag Mode Filter must be MAPFILTER_NEAREST or |
* MAPFILTER_LINEAR. |
* - Min Mode Filter must be MAPFILTER_NEAREST or |
* MAPFILTER_LINEAR. |
* - Mip Mode Filter must be MIPFILTER_NONE. |
* - Min LOD must be 0. |
* - Max LOD must be 0. |
* - MIP Count must be 0. |
* - Surface Min LOD must be 0. |
* - Texture LOD Bias must be 0." |
*/ |
assert(wrap_s == BRW_TEXCOORDMODE_CLAMP || |
wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER); |
assert(wrap_t == BRW_TEXCOORDMODE_CLAMP || |
wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER); |
assert(wrap_r == BRW_TEXCOORDMODE_CLAMP || |
wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER); |
assert(mag_filter == BRW_MAPFILTER_NEAREST || |
mag_filter == BRW_MAPFILTER_LINEAR); |
assert(min_filter == BRW_MAPFILTER_NEAREST || |
min_filter == BRW_MAPFILTER_LINEAR); |
/* work around a bug in util_blitter */ |
mip_filter = BRW_MIPFILTER_NONE; |
assert(mip_filter == BRW_MIPFILTER_NONE); |
} |
if (dev->gen >= ILO_GEN(7)) { |
dw0 = 1 << 28 | |
mip_filter << 20 | |
lod_bias << 1; |
sampler->dw_filter = mag_filter << 17 | |
min_filter << 14; |
sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 | |
BRW_MAPFILTER_ANISOTROPIC << 14 | |
1; |
dw1 = min_lod << 20 | |
max_lod << 8; |
if (state->compare_mode != PIPE_TEX_COMPARE_NONE) |
dw1 |= gen6_translate_shadow_func(state->compare_func) << 1; |
dw3 = max_aniso << 19; |
/* round the coordinates for linear filtering */ |
if (min_filter != BRW_MAPFILTER_NEAREST) { |
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | |
BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | |
BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; |
} |
if (mag_filter != BRW_MAPFILTER_NEAREST) { |
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | |
BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | |
BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; |
} |
if (!state->normalized_coords) |
dw3 |= 1 << 10; |
sampler->dw_wrap = wrap_s << 6 | |
wrap_t << 3 | |
wrap_r; |
/* |
* As noted in the classic i965 driver, the HW may still reference |
* wrap_t and wrap_r for 1D textures. We need to set them to a safe |
* mode |
*/ |
sampler->dw_wrap_1d = wrap_s << 6 | |
BRW_TEXCOORDMODE_WRAP << 3 | |
BRW_TEXCOORDMODE_WRAP; |
sampler->dw_wrap_cube = wrap_cube << 6 | |
wrap_cube << 3 | |
wrap_cube; |
STATIC_ASSERT(Elements(sampler->payload) >= 7); |
sampler->payload[0] = dw0; |
sampler->payload[1] = dw1; |
sampler->payload[2] = dw3; |
memcpy(&sampler->payload[3], |
state->border_color.ui, sizeof(state->border_color.ui)); |
} |
else { |
dw0 = 1 << 28 | |
mip_filter << 20 | |
lod_bias << 3; |
if (state->compare_mode != PIPE_TEX_COMPARE_NONE) |
dw0 |= gen6_translate_shadow_func(state->compare_func); |
sampler->dw_filter = (min_filter != mag_filter) << 27 | |
mag_filter << 17 | |
min_filter << 14; |
sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 | |
BRW_MAPFILTER_ANISOTROPIC << 14; |
dw1 = min_lod << 22 | |
max_lod << 12; |
sampler->dw_wrap = wrap_s << 6 | |
wrap_t << 3 | |
wrap_r; |
sampler->dw_wrap_1d = wrap_s << 6 | |
BRW_TEXCOORDMODE_WRAP << 3 | |
BRW_TEXCOORDMODE_WRAP; |
sampler->dw_wrap_cube = wrap_cube << 6 | |
wrap_cube << 3 | |
wrap_cube; |
dw3 = max_aniso << 19; |
/* round the coordinates for linear filtering */ |
if (min_filter != BRW_MAPFILTER_NEAREST) { |
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | |
BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | |
BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13; |
} |
if (mag_filter != BRW_MAPFILTER_NEAREST) { |
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | |
BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | |
BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13; |
} |
if (!state->normalized_coords) |
dw3 |= 1; |
STATIC_ASSERT(Elements(sampler->payload) >= 15); |
sampler->payload[0] = dw0; |
sampler->payload[1] = dw1; |
sampler->payload[2] = dw3; |
sampler_init_border_color_gen6(dev, |
&state->border_color, &sampler->payload[3], 12); |
} |
} |
static uint32_t |
gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, |
const struct ilo_sampler_cso * const *samplers, |
const struct pipe_sampler_view * const *views, |
const uint32_t *sampler_border_colors, |
int num_samplers, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = 4 * num_samplers; |
uint32_t state_offset, *dw; |
int i; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 101: |
* |
* "The sampler state is stored as an array of up to 16 elements..." |
*/ |
assert(num_samplers <= 16); |
if (!num_samplers) |
return 0; |
dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE", |
state_len, state_align, &state_offset); |
for (i = 0; i < num_samplers; i++) { |
const struct ilo_sampler_cso *sampler = samplers[i]; |
const struct pipe_sampler_view *view = views[i]; |
const uint32_t border_color = sampler_border_colors[i]; |
uint32_t dw_filter, dw_wrap; |
/* there may be holes */ |
if (!sampler || !view) { |
/* disabled sampler */ |
dw[0] = 1 << 31; |
dw[1] = 0; |
dw[2] = 0; |
dw[3] = 0; |
dw += 4; |
continue; |
} |
/* determine filter and wrap modes */ |
switch (view->texture->target) { |
case PIPE_TEXTURE_1D: |
dw_filter = (sampler->anisotropic) ? |
sampler->dw_filter_aniso : sampler->dw_filter; |
dw_wrap = sampler->dw_wrap_1d; |
break; |
case PIPE_TEXTURE_3D: |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 103: |
* |
* "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for |
* surfaces of type SURFTYPE_3D." |
*/ |
dw_filter = sampler->dw_filter; |
dw_wrap = sampler->dw_wrap; |
break; |
case PIPE_TEXTURE_CUBE: |
dw_filter = (sampler->anisotropic) ? |
sampler->dw_filter_aniso : sampler->dw_filter; |
dw_wrap = sampler->dw_wrap_cube; |
break; |
default: |
dw_filter = (sampler->anisotropic) ? |
sampler->dw_filter_aniso : sampler->dw_filter; |
dw_wrap = sampler->dw_wrap; |
break; |
} |
dw[0] = sampler->payload[0]; |
dw[1] = sampler->payload[1]; |
assert(!(border_color & 0x1f)); |
dw[2] = border_color; |
dw[3] = sampler->payload[2]; |
dw[0] |= dw_filter; |
if (dev->gen >= ILO_GEN(7)) { |
dw[3] |= dw_wrap; |
} |
else { |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 21: |
* |
* "[DevSNB] Errata: Incorrect behavior is observed in cases |
* where the min and mag mode filters are different and |
* SurfMinLOD is nonzero. The determination of MagMode uses the |
* following equation instead of the one in the above |
* pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" |
* |
* As a way to work around that, we set Base to |
* view->u.tex.first_level. |
*/ |
dw[0] |= view->u.tex.first_level << 22; |
dw[1] |= dw_wrap; |
} |
dw += 4; |
} |
return state_offset; |
} |
static uint32_t |
gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, |
const struct ilo_sampler_cso *sampler, |
struct ilo_cp *cp) |
{ |
const int state_align = 32 / 4; |
const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12; |
uint32_t state_offset, *dw; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", |
state_len, state_align, &state_offset); |
/* see ilo_gpe_init_sampler_cso() */ |
memcpy(dw, &sampler->payload[3], state_len * 4); |
return state_offset; |
} |
static uint32_t |
gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev, |
int size, void **pcb, |
struct ilo_cp *cp) |
{ |
/* |
* For all VS, GS, FS, and CS push constant buffers, they must be aligned |
* to 32 bytes, and their sizes are specified in 256-bit units. |
*/ |
const int state_align = 32 / 4; |
const int state_len = align(size, 32) / 4; |
uint32_t state_offset; |
char *buf; |
ILO_GPE_VALID_GEN(dev, 6, 7); |
buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER", |
state_len, state_align, &state_offset); |
/* zero out the unused range */ |
if (size < state_len * 4) |
memset(&buf[size], 0, state_len * 4 - size); |
if (pcb) |
*pcb = buf; |
return state_offset; |
} |
static int |
gen6_estimate_command_size(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen6_command cmd, |
int arg) |
{ |
static const struct { |
int header; |
int body; |
} gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = { |
[ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 }, |
[ILO_GPE_GEN6_STATE_SIP] = { 0, 2 }, |
[ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 }, |
[ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 }, |
[ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 }, |
[ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 }, |
[ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 }, |
[ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 }, |
[ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 }, |
[ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 }, |
[ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 }, |
[ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 }, |
[ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 }, |
[ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 }, |
[ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 }, |
[ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 }, |
[ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 }, |
[ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 }, |
[ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 }, |
[ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 }, |
[ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 }, |
[ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 }, |
[ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 }, |
[ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 }, |
[ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 }, |
[ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 }, |
[ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 }, |
[ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 }, |
[ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 }, |
[ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 }, |
[ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 }, |
[ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 }, |
[ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 }, |
[ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 }, |
[ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 }, |
[ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 }, |
[ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 }, |
[ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 }, |
[ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 }, |
[ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 }, |
[ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 }, |
}; |
const int header = gen6_command_size_table[cmd].header; |
const int body = gen6_command_size_table[arg].body; |
const int count = arg; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT); |
return (likely(count)) ? header + body * count : 0; |
} |
static int |
gen6_estimate_state_size(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen6_state state, |
int arg) |
{ |
static const struct { |
int alignment; |
int body; |
bool is_array; |
} gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = { |
[ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true }, |
[ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true }, |
[ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true }, |
[ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true }, |
[ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false }, |
[ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true }, |
[ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false }, |
[ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true }, |
[ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true }, |
[ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false }, |
[ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true }, |
[ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false }, |
[ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true }, |
}; |
const int alignment = gen6_state_size_table[state].alignment; |
const int body = gen6_state_size_table[state].body; |
const bool is_array = gen6_state_size_table[state].is_array; |
const int count = arg; |
int estimate; |
ILO_GPE_VALID_GEN(dev, 6, 6); |
assert(state < ILO_GPE_GEN6_STATE_COUNT); |
if (likely(count)) { |
if (is_array) { |
estimate = (alignment - 1) + body * count; |
} |
else { |
estimate = (alignment - 1) + body; |
/* all states are aligned */ |
if (count > 1) |
estimate += util_align_npot(body, alignment) * (count - 1); |
} |
} |
else { |
estimate = 0; |
} |
return estimate; |
} |
static const struct ilo_gpe_gen6 gen6_gpe = { |
.estimate_command_size = gen6_estimate_command_size, |
.estimate_state_size = gen6_estimate_state_size, |
#define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name |
GEN6_SET(STATE_BASE_ADDRESS), |
GEN6_SET(STATE_SIP), |
GEN6_SET(3DSTATE_VF_STATISTICS), |
GEN6_SET(PIPELINE_SELECT), |
GEN6_SET(MEDIA_VFE_STATE), |
GEN6_SET(MEDIA_CURBE_LOAD), |
GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD), |
GEN6_SET(MEDIA_GATEWAY_STATE), |
GEN6_SET(MEDIA_STATE_FLUSH), |
GEN6_SET(MEDIA_OBJECT_WALKER), |
GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS), |
GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS), |
GEN6_SET(3DSTATE_URB), |
GEN6_SET(3DSTATE_VERTEX_BUFFERS), |
GEN6_SET(3DSTATE_VERTEX_ELEMENTS), |
GEN6_SET(3DSTATE_INDEX_BUFFER), |
GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS), |
GEN6_SET(3DSTATE_CC_STATE_POINTERS), |
GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS), |
GEN6_SET(3DSTATE_VS), |
GEN6_SET(3DSTATE_GS), |
GEN6_SET(3DSTATE_CLIP), |
GEN6_SET(3DSTATE_SF), |
GEN6_SET(3DSTATE_WM), |
GEN6_SET(3DSTATE_CONSTANT_VS), |
GEN6_SET(3DSTATE_CONSTANT_GS), |
GEN6_SET(3DSTATE_CONSTANT_PS), |
GEN6_SET(3DSTATE_SAMPLE_MASK), |
GEN6_SET(3DSTATE_DRAWING_RECTANGLE), |
GEN6_SET(3DSTATE_DEPTH_BUFFER), |
GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET), |
GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN), |
GEN6_SET(3DSTATE_LINE_STIPPLE), |
GEN6_SET(3DSTATE_AA_LINE_PARAMETERS), |
GEN6_SET(3DSTATE_GS_SVB_INDEX), |
GEN6_SET(3DSTATE_MULTISAMPLE), |
GEN6_SET(3DSTATE_STENCIL_BUFFER), |
GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER), |
GEN6_SET(3DSTATE_CLEAR_PARAMS), |
GEN6_SET(PIPE_CONTROL), |
GEN6_SET(3DPRIMITIVE), |
GEN6_SET(INTERFACE_DESCRIPTOR_DATA), |
GEN6_SET(SF_VIEWPORT), |
GEN6_SET(CLIP_VIEWPORT), |
GEN6_SET(CC_VIEWPORT), |
GEN6_SET(COLOR_CALC_STATE), |
GEN6_SET(BLEND_STATE), |
GEN6_SET(DEPTH_STENCIL_STATE), |
GEN6_SET(SCISSOR_RECT), |
GEN6_SET(BINDING_TABLE_STATE), |
GEN6_SET(SURFACE_STATE), |
GEN6_SET(so_SURFACE_STATE), |
GEN6_SET(SAMPLER_STATE), |
GEN6_SET(SAMPLER_BORDER_COLOR_STATE), |
GEN6_SET(push_constant_buffer), |
#undef GEN6_SET |
}; |
const struct ilo_gpe_gen6 * |
ilo_gpe_gen6_get(void) |
{ |
return &gen6_gpe; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h |
---|
0,0 → 1,560 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_GPE_GEN6_H |
#define ILO_GPE_GEN6_H |
#include "ilo_common.h" |
#include "ilo_gpe.h" |
#define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \ |
assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen)) |
#define ILO_GPE_CMD(pipeline, op, subop) \ |
(0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16) |
/** |
* Commands that GEN6 GPE could emit. |
*/ |
enum ilo_gpe_gen6_command { |
ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */ |
ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */ |
ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */ |
ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */ |
ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */ |
ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */ |
ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */ |
ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */ |
ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */ |
ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */ |
ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */ |
ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */ |
ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */ |
ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */ |
ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */ |
ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */ |
ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */ |
ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */ |
ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */ |
ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */ |
ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */ |
ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */ |
ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */ |
ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */ |
ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */ |
ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */ |
ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */ |
ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */ |
ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */ |
ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */ |
ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */ |
ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */ |
ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */ |
ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */ |
ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */ |
ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */ |
ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */ |
ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */ |
ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */ |
ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */ |
ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */ |
ILO_GPE_GEN6_COMMAND_COUNT, |
}; |
/** |
* Indirect states that GEN6 GPE could emit. |
*/ |
enum ilo_gpe_gen6_state { |
ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA, |
ILO_GPE_GEN6_SF_VIEWPORT, |
ILO_GPE_GEN6_CLIP_VIEWPORT, |
ILO_GPE_GEN6_CC_VIEWPORT, |
ILO_GPE_GEN6_COLOR_CALC_STATE, |
ILO_GPE_GEN6_BLEND_STATE, |
ILO_GPE_GEN6_DEPTH_STENCIL_STATE, |
ILO_GPE_GEN6_SCISSOR_RECT, |
ILO_GPE_GEN6_BINDING_TABLE_STATE, |
ILO_GPE_GEN6_SURFACE_STATE, |
ILO_GPE_GEN6_SAMPLER_STATE, |
ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE, |
ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER, |
ILO_GPE_GEN6_STATE_COUNT, |
}; |
enum intel_tiling_mode; |
struct intel_bo; |
struct ilo_cp; |
struct ilo_texture; |
struct ilo_shader; |
typedef void |
(*ilo_gpe_gen6_STATE_BASE_ADDRESS)(const struct ilo_dev_info *dev, |
struct intel_bo *general_state_bo, |
struct intel_bo *surface_state_bo, |
struct intel_bo *dynamic_state_bo, |
struct intel_bo *indirect_object_bo, |
struct intel_bo *instruction_bo, |
uint32_t general_state_size, |
uint32_t dynamic_state_size, |
uint32_t indirect_object_size, |
uint32_t instruction_size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_STATE_SIP)(const struct ilo_dev_info *dev, |
uint32_t sip, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_VF_STATISTICS)(const struct ilo_dev_info *dev, |
bool enable, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_PIPELINE_SELECT)(const struct ilo_dev_info *dev, |
int pipeline, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_MEDIA_VFE_STATE)(const struct ilo_dev_info *dev, |
int max_threads, int num_urb_entries, |
int urb_entry_size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_MEDIA_CURBE_LOAD)(const struct ilo_dev_info *dev, |
uint32_t buf, int size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD)(const struct ilo_dev_info *dev, |
uint32_t offset, int num_ids, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_MEDIA_GATEWAY_STATE)(const struct ilo_dev_info *dev, |
int id, int byte, int thread_count, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_MEDIA_STATE_FLUSH)(const struct ilo_dev_info *dev, |
int thread_count_water_mark, |
int barrier_mask, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_MEDIA_OBJECT_WALKER)(const struct ilo_dev_info *dev, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_BINDING_TABLE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t vs_binding_table, |
uint32_t gs_binding_table, |
uint32_t ps_binding_table, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_SAMPLER_STATE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t vs_sampler_state, |
uint32_t gs_sampler_state, |
uint32_t ps_sampler_state, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_URB)(const struct ilo_dev_info *dev, |
int vs_total_size, int gs_total_size, |
int vs_entry_size, int gs_entry_size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS)(const struct ilo_dev_info *dev, |
const struct pipe_vertex_buffer *vbuffers, |
uint64_t vbuffer_mask, |
const struct ilo_ve_state *ve, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS)(const struct ilo_dev_info *dev, |
const struct ilo_ve_state *ve, |
bool last_velement_edgeflag, |
bool prepend_generated_ids, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_INDEX_BUFFER)(const struct ilo_dev_info *dev, |
const struct ilo_ib_state *ib, |
bool enable_cut_index, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_VIEWPORT_STATE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t clip_viewport, |
uint32_t sf_viewport, |
uint32_t cc_viewport, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t blend_state, |
uint32_t depth_stencil_state, |
uint32_t color_calc_state, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t scissor_rect, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_VS)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *vs, |
int num_samplers, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
const struct ilo_shader_state *vs, |
int verts_per_prim, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_CLIP)(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
bool enable_guardband, |
int num_viewports, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_SF)(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
const struct ilo_shader_state *last_sh, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_WM)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
int num_samplers, |
const struct ilo_rasterizer_state *rasterizer, |
bool dual_blend, bool cc_may_kill, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_CONSTANT_VS)(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_CONSTANT_GS)(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_CONSTANT_PS)(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev, |
unsigned sample_mask, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE)(const struct ilo_dev_info *dev, |
unsigned x, unsigned y, |
unsigned width, unsigned height, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER)(const struct ilo_dev_info *dev, |
const struct ilo_zs_surface *zs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET)(const struct ilo_dev_info *dev, |
int x_offset, int y_offset, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN)(const struct ilo_dev_info *dev, |
const struct pipe_poly_stipple *pattern, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_LINE_STIPPLE)(const struct ilo_dev_info *dev, |
unsigned pattern, unsigned factor, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS)(const struct ilo_dev_info *dev, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_GS_SVB_INDEX)(const struct ilo_dev_info *dev, |
int index, unsigned svbi, |
unsigned max_svbi, |
bool load_vertex_count, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_MULTISAMPLE)(const struct ilo_dev_info *dev, |
int num_samples, |
const uint32_t *packed_sample_pos, |
bool pixel_location_center, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER)(const struct ilo_dev_info *dev, |
const struct ilo_zs_surface *zs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER)(const struct ilo_dev_info *dev, |
const struct ilo_zs_surface *zs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS)(const struct ilo_dev_info *dev, |
uint32_t clear_val, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_PIPE_CONTROL)(const struct ilo_dev_info *dev, |
uint32_t dw1, |
struct intel_bo *bo, uint32_t bo_offset, |
bool write_qword, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen6_3DPRIMITIVE)(const struct ilo_dev_info *dev, |
const struct pipe_draw_info *info, |
const struct ilo_ib_state *ib, |
bool rectlist, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state **cs, |
uint32_t *sampler_state, |
int *num_samplers, |
uint32_t *binding_table_state, |
int *num_surfaces, |
int num_ids, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_SF_VIEWPORT)(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_CLIP_VIEWPORT)(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_CC_VIEWPORT)(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_COLOR_CALC_STATE)(const struct ilo_dev_info *dev, |
const struct pipe_stencil_ref *stencil_ref, |
float alpha_ref, |
const struct pipe_blend_color *blend_color, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_BLEND_STATE)(const struct ilo_dev_info *dev, |
const struct ilo_blend_state *blend, |
const struct ilo_fb_state *fb, |
const struct pipe_alpha_state *alpha, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_DEPTH_STENCIL_STATE)(const struct ilo_dev_info *dev, |
const struct ilo_dsa_state *dsa, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_SCISSOR_RECT)(const struct ilo_dev_info *dev, |
const struct ilo_scissor_state *scissor, |
unsigned num_viewports, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_BINDING_TABLE_STATE)(const struct ilo_dev_info *dev, |
uint32_t *surface_states, |
int num_surface_states, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_SURFACE_STATE)(const struct ilo_dev_info *dev, |
const struct ilo_view_surface *surface, |
bool for_render, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_so_SURFACE_STATE)(const struct ilo_dev_info *dev, |
const struct pipe_stream_output_target *so, |
const struct pipe_stream_output_info *so_info, |
int so_index, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_SAMPLER_STATE)(const struct ilo_dev_info *dev, |
const struct ilo_sampler_cso * const *samplers, |
const struct pipe_sampler_view * const *views, |
const uint32_t *sampler_border_colors, |
int num_samplers, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE)(const struct ilo_dev_info *dev, |
const struct ilo_sampler_cso *sampler, |
struct ilo_cp *cp); |
typedef uint32_t |
(*ilo_gpe_gen6_push_constant_buffer)(const struct ilo_dev_info *dev, |
int size, void **pcb, |
struct ilo_cp *cp); |
/** |
* GEN6 graphics processing engine |
* |
* This is a low-level interface. It does not handle the interdependencies |
* between states. |
*/ |
struct ilo_gpe_gen6 { |
int (*estimate_command_size)(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen6_command cmd, |
int arg); |
int (*estimate_state_size)(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen6_state state, |
int arg); |
#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name emit_ ## name |
GEN6_EMIT(STATE_BASE_ADDRESS); |
GEN6_EMIT(STATE_SIP); |
GEN6_EMIT(3DSTATE_VF_STATISTICS); |
GEN6_EMIT(PIPELINE_SELECT); |
GEN6_EMIT(MEDIA_VFE_STATE); |
GEN6_EMIT(MEDIA_CURBE_LOAD); |
GEN6_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD); |
GEN6_EMIT(MEDIA_GATEWAY_STATE); |
GEN6_EMIT(MEDIA_STATE_FLUSH); |
GEN6_EMIT(MEDIA_OBJECT_WALKER); |
GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS); |
GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_URB); |
GEN6_EMIT(3DSTATE_VERTEX_BUFFERS); |
GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS); |
GEN6_EMIT(3DSTATE_INDEX_BUFFER); |
GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_CC_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS); |
GEN6_EMIT(3DSTATE_VS); |
GEN6_EMIT(3DSTATE_GS); |
GEN6_EMIT(3DSTATE_CLIP); |
GEN6_EMIT(3DSTATE_SF); |
GEN6_EMIT(3DSTATE_WM); |
GEN6_EMIT(3DSTATE_CONSTANT_VS); |
GEN6_EMIT(3DSTATE_CONSTANT_GS); |
GEN6_EMIT(3DSTATE_CONSTANT_PS); |
GEN6_EMIT(3DSTATE_SAMPLE_MASK); |
GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE); |
GEN6_EMIT(3DSTATE_DEPTH_BUFFER); |
GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET); |
GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN); |
GEN6_EMIT(3DSTATE_LINE_STIPPLE); |
GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS); |
GEN6_EMIT(3DSTATE_GS_SVB_INDEX); |
GEN6_EMIT(3DSTATE_MULTISAMPLE); |
GEN6_EMIT(3DSTATE_STENCIL_BUFFER); |
GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER); |
GEN6_EMIT(3DSTATE_CLEAR_PARAMS); |
GEN6_EMIT(PIPE_CONTROL); |
GEN6_EMIT(3DPRIMITIVE); |
GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA); |
GEN6_EMIT(SF_VIEWPORT); |
GEN6_EMIT(CLIP_VIEWPORT); |
GEN6_EMIT(CC_VIEWPORT); |
GEN6_EMIT(COLOR_CALC_STATE); |
GEN6_EMIT(BLEND_STATE); |
GEN6_EMIT(DEPTH_STENCIL_STATE); |
GEN6_EMIT(SCISSOR_RECT); |
GEN6_EMIT(BINDING_TABLE_STATE); |
GEN6_EMIT(SURFACE_STATE); |
GEN6_EMIT(so_SURFACE_STATE); |
GEN6_EMIT(SAMPLER_STATE); |
GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE); |
GEN6_EMIT(push_constant_buffer); |
#undef GEN6_EMIT |
}; |
const struct ilo_gpe_gen6 * |
ilo_gpe_gen6_get(void); |
/* Below are helpers for other GENs */ |
int |
ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling); |
int |
ilo_gpe_gen6_translate_pipe_prim(unsigned prim); |
int |
ilo_gpe_gen6_translate_texture(enum pipe_texture_target target); |
void |
ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
int num_samples, |
enum pipe_format depth_format, |
uint32_t *payload, unsigned payload_len); |
void |
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
const struct ilo_shader_state *last_sh, |
uint32_t *dw, int num_dwords); |
#endif /* ILO_GPE_GEN6_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c |
---|
0,0 → 1,1939 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_resource.h" |
#include "brw_defines.h" |
#include "intel_reg.h" |
#include "ilo_cp.h" |
#include "ilo_format.h" |
#include "ilo_resource.h" |
#include "ilo_shader.h" |
#include "ilo_gpe_gen7.h" |
static void |
gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev, |
struct ilo_cp *cp) |
{ |
assert(!"GPGPU_WALKER unsupported"); |
} |
static void |
gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, |
uint32_t clear_val, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04); |
const uint8_t cmd_len = 3; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, clear_val); |
ilo_cp_write(cp, 1); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev, |
int subop, uint32_t pointer, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); |
const uint8_t cmd_len = 2; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, pointer); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t color_calc_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp); |
} |
void |
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, vue_read_len, max_threads; |
uint32_t dw2, dw4, dw5; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); |
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); |
/* in pairs */ |
vue_read_len = (vue_read_len + 1) / 2; |
switch (dev->gen) { |
case ILO_GEN(7): |
max_threads = (dev->gt == 2) ? 128 : 36; |
break; |
default: |
max_threads = 1; |
break; |
} |
dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT; |
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT | |
GEN7_GS_INCLUDE_VERTEX_HANDLES | |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT | |
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT; |
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT | |
GEN6_GS_STATISTICS_ENABLE | |
GEN6_GS_ENABLE; |
STATIC_ASSERT(Elements(cso->payload) >= 3); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
} |
static void |
gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
int num_samplers, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11); |
const uint8_t cmd_len = 7; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
if (!gs) { |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
return; |
} |
cso = ilo_shader_get_kernel_cso(gs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs)); |
ilo_cp_write(cp, dw2); |
ilo_cp_write(cp, 0); /* scratch */ |
ilo_cp_write(cp, dw4); |
ilo_cp_write(cp, dw5); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct pipe_surface *zs_surf, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13); |
const uint8_t cmd_len = 7; |
const int num_samples = 1; |
uint32_t payload[6]; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
ilo_gpe_gen6_fill_3dstate_sf_raster(dev, |
rasterizer, num_samples, |
(zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE, |
payload, Elements(payload)); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write_multi(cp, payload, 6); |
ilo_cp_end(cp); |
} |
void |
ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev, |
const struct pipe_rasterizer_state *state, |
struct ilo_rasterizer_wm *wm) |
{ |
uint32_t dw1, dw2; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
dw1 = GEN7_WM_POSITION_ZW_PIXEL | |
GEN7_WM_LINE_AA_WIDTH_2_0 | |
GEN7_WM_MSRAST_OFF_PIXEL; |
/* same value as in 3DSTATE_SF */ |
if (state->line_smooth) |
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0; |
if (state->poly_stipple_enable) |
dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE; |
if (state->line_stipple_enable) |
dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE; |
if (state->bottom_edge_rule) |
dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT; |
dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE; |
/* |
* assertion that makes sure |
* |
* dw1 |= wm->dw_msaa_rast; |
* dw2 |= wm->dw_msaa_disp; |
* |
* is valid |
*/ |
STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 && |
GEN7_WM_MSDISPMODE_PERSAMPLE == 0); |
wm->dw_msaa_rast = |
(state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0; |
wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL; |
STATIC_ASSERT(Elements(wm->payload) >= 2); |
wm->payload[0] = dw1; |
wm->payload[1] = dw2; |
} |
void |
ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
struct ilo_shader_cso *cso) |
{ |
int start_grf, max_threads; |
uint32_t dw2, dw4, dw5; |
uint32_t wm_interps, wm_dw1; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); |
/* see brwCreateContext() */ |
max_threads = (dev->gt == 2) ? 172 : 48; |
dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT; |
dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | |
GEN7_PS_POSOFFSET_NONE; |
if (false) |
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) |
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE; |
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); |
dw4 |= GEN7_PS_8_DISPATCH_ENABLE; |
dw5 = start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 | |
0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 | |
0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; |
/* FS affects 3DSTATE_WM too */ |
wm_dw1 = 0; |
/* |
* TODO set this bit only when |
* |
* a) fs writes colors and color is not masked, or |
* b) fs writes depth, or |
* c) fs or cc kills |
*/ |
wm_dw1 |= GEN7_WM_DISPATCH_ENABLE; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 278: |
* |
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that |
* the PS kernel or color calculator has the ability to kill |
* (discard) pixels or samples, other than due to depth or stencil |
* testing. This bit is required to be ENABLED in the following |
* situations: |
* |
* - The API pixel shader program contains "killpix" or "discard" |
* instructions, or other code in the pixel shader kernel that |
* can cause the final pixel mask to differ from the pixel mask |
* received on dispatch. |
* |
* - A sampler with chroma key enabled with kill pixel mode is used |
* by the pixel shader. |
* |
* - Any render target has Alpha Test Enable or AlphaToCoverage |
* Enable enabled. |
* |
* - The pixel shader kernel generates and outputs oMask. |
* |
* Note: As ClipDistance clipping is fully supported in hardware |
* and therefore not via PS instructions, there should be no need |
* to ENABLE this bit due to ClipDistance clipping." |
*/ |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) |
wm_dw1 |= GEN7_WM_KILL_ENABLE; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) |
wm_dw1 |= GEN7_WM_PSCDEPTH_ON; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) |
wm_dw1 |= GEN7_WM_USES_SOURCE_DEPTH; |
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) |
wm_dw1 |= GEN7_WM_USES_SOURCE_W; |
wm_interps = ilo_shader_get_kernel_param(fs, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS); |
wm_dw1 |= wm_interps << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; |
STATIC_ASSERT(Elements(cso->payload) >= 4); |
cso->payload[0] = dw2; |
cso->payload[1] = dw4; |
cso->payload[2] = dw5; |
cso->payload[3] = wm_dw1; |
} |
static void |
gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
const struct ilo_rasterizer_state *rasterizer, |
bool cc_may_kill, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14); |
const uint8_t cmd_len = 3; |
const int num_samples = 1; |
uint32_t dw1, dw2; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
/* see ilo_gpe_init_rasterizer_wm() */ |
dw1 = rasterizer->wm.payload[0]; |
dw2 = rasterizer->wm.payload[1]; |
dw1 |= GEN7_WM_STATISTICS_ENABLE; |
if (false) { |
dw1 |= GEN7_WM_DEPTH_CLEAR; |
dw1 |= GEN7_WM_DEPTH_RESOLVE; |
dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; |
} |
if (fs) { |
const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); |
dw1 |= fs_cso->payload[3]; |
} |
if (cc_may_kill) { |
dw1 |= GEN7_WM_DISPATCH_ENABLE | |
GEN7_WM_KILL_ENABLE; |
} |
if (num_samples > 1) { |
dw1 |= rasterizer->wm.dw_msaa_rast; |
dw2 |= rasterizer->wm.dw_msaa_disp; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, dw2); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3dstate_constant(const struct ilo_dev_info *dev, |
int subop, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); |
const uint8_t cmd_len = 7; |
uint32_t dw[6]; |
int total_read_length, i; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
/* VS, HS, DS, GS, and PS variants */ |
assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18); |
assert(num_bufs <= 4); |
dw[0] = 0; |
dw[1] = 0; |
total_read_length = 0; |
for (i = 0; i < 4; i++) { |
int read_len; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 112: |
* |
* "Constant buffers must be enabled in order from Constant Buffer 0 |
* to Constant Buffer 3 within this command. For example, it is |
* not allowed to enable Constant Buffer 1 by programming a |
* non-zero value in the VS Constant Buffer 1 Read Length without a |
* non-zero value in VS Constant Buffer 0 Read Length." |
*/ |
if (i >= num_bufs || !sizes[i]) { |
for (; i < 4; i++) { |
assert(i >= num_bufs || !sizes[i]); |
dw[2 + i] = 0; |
} |
break; |
} |
/* read lengths are in 256-bit units */ |
read_len = (sizes[i] + 31) / 32; |
/* the lower 5 bits are used for memory object control state */ |
assert(bufs[i] % 32 == 0); |
dw[i / 2] |= read_len << ((i % 2) ? 16 : 0); |
dw[2 + i] = bufs[i]; |
total_read_length += read_len; |
} |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 113: |
* |
* "The sum of all four read length fields must be less than or equal |
* to the size of 64" |
*/ |
assert(total_read_length <= 64); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write_multi(cp, dw, 6); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp); |
} |
static void |
gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp); |
} |
static void |
gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp); |
} |
static void |
gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, |
unsigned sample_mask, |
int num_samples, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18); |
const uint8_t cmd_len = 2; |
const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 294: |
* |
* "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field |
* (Sample Mask) must be zero. |
* |
* If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field |
* must be zero." |
*/ |
sample_mask &= valid_mask; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, sample_mask); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp); |
} |
static void |
gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp); |
} |
static void |
gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *hs, |
int num_samplers, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b); |
const uint8_t cmd_len = 7; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
assert(!hs); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c); |
const uint8_t cmd_len = 4; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *ds, |
int num_samplers, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d); |
const uint8_t cmd_len = 6; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
assert(!ds); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev, |
unsigned buffer_mask, |
int vertex_attrib_count, |
bool rasterizer_discard, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e); |
const uint8_t cmd_len = 3; |
const bool enable = (buffer_mask != 0); |
uint32_t dw1, dw2; |
int read_len; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
if (!enable) { |
dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT; |
if (rasterizer_discard) |
dw1 |= SO_RENDERING_DISABLE; |
dw2 = 0; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, dw2); |
ilo_cp_end(cp); |
return; |
} |
read_len = (vertex_attrib_count + 1) / 2; |
if (!read_len) |
read_len = 1; |
dw1 = SO_FUNCTION_ENABLE | |
0 << SO_RENDER_STREAM_SELECT_SHIFT | |
SO_STATISTICS_ENABLE | |
buffer_mask << 8; |
if (rasterizer_discard) |
dw1 |= SO_RENDERING_DISABLE; |
/* API_OPENGL */ |
if (true) |
dw1 |= SO_REORDER_TRAILING; |
dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT | |
0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT | |
0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT | |
0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT | |
0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT | |
0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT | |
0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT | |
(read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, dw1); |
ilo_cp_write(cp, dw2); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
const struct ilo_shader_state *last_sh, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f); |
const uint8_t cmd_len = 14; |
uint32_t dw[13]; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, |
fs, last_sh, dw, Elements(dw)); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write_multi(cp, dw, 13); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
int num_samplers, bool dual_blend, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20); |
const uint8_t cmd_len = 8; |
const struct ilo_shader_cso *cso; |
uint32_t dw2, dw4, dw5; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
if (!fs) { |
/* see brwCreateContext() */ |
const int max_threads = (dev->gt == 2) ? 172 : 48; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
/* GPU hangs if none of the dispatch enable bits is set */ |
ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | |
GEN7_PS_8_DISPATCH_ENABLE); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
return; |
} |
cso = ilo_shader_get_kernel_cso(fs); |
dw2 = cso->payload[0]; |
dw4 = cso->payload[1]; |
dw5 = cso->payload[2]; |
dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT; |
if (dual_blend) |
dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs)); |
ilo_cp_write(cp, dw2); |
ilo_cp_write(cp, 0); /* scratch */ |
ilo_cp_write(cp, dw4); |
ilo_cp_write(cp, dw5); |
ilo_cp_write(cp, 0); /* kernel 1 */ |
ilo_cp_write(cp, 0); /* kernel 2 */ |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev, |
uint32_t sf_clip_viewport, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp); |
} |
static void |
gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev, |
uint32_t cc_viewport, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp); |
} |
static void |
gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t blend_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp); |
} |
static void |
gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev, |
uint32_t depth_stencil_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp); |
} |
static void |
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp); |
} |
static void |
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp); |
} |
static void |
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp); |
} |
static void |
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp); |
} |
static void |
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp); |
} |
static void |
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp); |
} |
static void |
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp); |
} |
static void |
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp); |
} |
static void |
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp); |
} |
static void |
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp); |
} |
static void |
gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, |
int subop, int offset, int size, |
int entry_size, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); |
const uint8_t cmd_len = 2; |
const int row_size = 64; /* 512 bits */ |
int alloc_size, num_entries, min_entries, max_entries; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
/* VS, HS, DS, and GS variants */ |
assert(subop >= 0x30 && subop <= 0x33); |
/* in multiples of 8KB */ |
assert(offset % 8192 == 0); |
offset /= 8192; |
/* in multiple of 512-bit rows */ |
alloc_size = (entry_size + row_size - 1) / row_size; |
if (!alloc_size) |
alloc_size = 1; |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 34: |
* |
* "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may |
* cause performance to decrease due to banking in the URB. Element |
* sizes of 16 to 20 should be programmed with six 512-bit URB rows." |
*/ |
if (subop == 0x30 && alloc_size == 5) |
alloc_size = 6; |
/* in multiples of 8 */ |
num_entries = (size / row_size / alloc_size) & ~7; |
switch (subop) { |
case 0x30: /* 3DSTATE_URB_VS */ |
min_entries = 32; |
max_entries = (dev->gt == 2) ? 704 : 512; |
assert(num_entries >= min_entries); |
if (num_entries > max_entries) |
num_entries = max_entries; |
break; |
case 0x31: /* 3DSTATE_URB_HS */ |
max_entries = (dev->gt == 2) ? 64 : 32; |
if (num_entries > max_entries) |
num_entries = max_entries; |
break; |
case 0x32: /* 3DSTATE_URB_DS */ |
if (num_entries) |
assert(num_entries >= 138); |
break; |
case 0x33: /* 3DSTATE_URB_GS */ |
max_entries = (dev->gt == 2) ? 320 : 192; |
if (num_entries > max_entries) |
num_entries = max_entries; |
break; |
default: |
break; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT | |
(alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT | |
num_entries); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp); |
} |
static void |
gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp); |
} |
static void |
gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp); |
} |
static void |
gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp); |
} |
static void |
gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev, |
int subop, int offset, int size, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop); |
const uint8_t cmd_len = 2; |
int end; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
/* VS, HS, DS, GS, and PS variants */ |
assert(subop >= 0x12 && subop <= 0x16); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 68: |
* |
* "(A table that says the maximum size of each constant buffer is |
* 16KB") |
* |
* From the Ivy Bridge PRM, volume 2 part 1, page 115: |
* |
* "The sum of the Constant Buffer Offset and the Constant Buffer Size |
* may not exceed the maximum value of the Constant Buffer Size." |
* |
* Thus, the valid range of buffer end is [0KB, 16KB]. |
*/ |
end = (offset + size) / 1024; |
if (end > 16) { |
assert(!"invalid constant buffer end"); |
end = 16; |
} |
/* the valid range of buffer offset is [0KB, 15KB] */ |
offset = (offset + 1023) / 1024; |
if (offset > 15) { |
assert(!"invalid constant buffer offset"); |
offset = 15; |
} |
if (offset > end) { |
assert(!size); |
offset = end; |
} |
/* the valid range of buffer size is [0KB, 15KB] */ |
size = end - offset; |
if (size > 15) { |
assert(!"invalid constant buffer size"); |
size = 15; |
} |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT | |
size); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp); |
} |
static void |
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp); |
} |
static void |
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp); |
} |
static void |
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp); |
} |
static void |
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp) |
{ |
gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp); |
} |
static void |
gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, |
const struct pipe_stream_output_info *so_info, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17); |
uint16_t cmd_len; |
int buffer_selects, num_entries, i; |
uint16_t so_decls[128]; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
buffer_selects = 0; |
num_entries = 0; |
if (so_info) { |
int buffer_offsets[PIPE_MAX_SO_BUFFERS]; |
memset(buffer_offsets, 0, sizeof(buffer_offsets)); |
for (i = 0; i < so_info->num_outputs; i++) { |
unsigned decl, buf, reg, mask; |
buf = so_info->output[i].output_buffer; |
/* pad with holes */ |
assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); |
while (buffer_offsets[buf] < so_info->output[i].dst_offset) { |
int num_dwords; |
num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; |
if (num_dwords > 4) |
num_dwords = 4; |
decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | |
SO_DECL_HOLE_FLAG | |
((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT; |
so_decls[num_entries++] = decl; |
buffer_offsets[buf] += num_dwords; |
} |
reg = so_info->output[i].register_index; |
mask = ((1 << so_info->output[i].num_components) - 1) << |
so_info->output[i].start_component; |
decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT | |
reg << SO_DECL_REGISTER_INDEX_SHIFT | |
mask << SO_DECL_COMPONENT_MASK_SHIFT; |
so_decls[num_entries++] = decl; |
buffer_selects |= 1 << buf; |
buffer_offsets[buf] += so_info->output[i].num_components; |
} |
} |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 201: |
* |
* "Errata: All 128 decls for all four streams must be included |
* whenever this command is issued. The "Num Entries [n]" fields still |
* contain the actual numbers of valid decls." |
* |
* Also note that "DWord Length" has 9 bits for this command, and the type |
* of cmd_len is thus uint16_t. |
*/ |
cmd_len = 2 * 128 + 3; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT | |
0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT | |
0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT | |
buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT); |
ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT | |
0 << SO_NUM_ENTRIES_2_SHIFT | |
0 << SO_NUM_ENTRIES_1_SHIFT | |
num_entries << SO_NUM_ENTRIES_0_SHIFT); |
for (i = 0; i < num_entries; i++) { |
ilo_cp_write(cp, so_decls[i]); |
ilo_cp_write(cp, 0); |
} |
for (; i < 128; i++) { |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
} |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, |
int index, int base, int stride, |
const struct pipe_stream_output_target *so_target, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18); |
const uint8_t cmd_len = 4; |
struct ilo_buffer *buf; |
int end; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
if (!so_target || !so_target->buffer) { |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT); |
ilo_cp_write(cp, 0); |
ilo_cp_write(cp, 0); |
ilo_cp_end(cp); |
return; |
} |
buf = ilo_buffer(so_target->buffer); |
/* DWord-aligned */ |
assert(stride % 4 == 0 && base % 4 == 0); |
assert(so_target->buffer_offset % 4 == 0); |
stride &= ~3; |
base = (base + so_target->buffer_offset) & ~3; |
end = (base + so_target->buffer_size) & ~3; |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT | |
stride); |
ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); |
ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); |
ilo_cp_end(cp); |
} |
static void |
gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, |
const struct pipe_draw_info *info, |
const struct ilo_ib_state *ib, |
bool rectlist, |
struct ilo_cp *cp) |
{ |
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00); |
const uint8_t cmd_len = 7; |
const int prim = (rectlist) ? |
_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); |
const int vb_access = (info->indexed) ? |
GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM : |
GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; |
const uint32_t vb_start = info->start + |
((info->indexed) ? ib->draw_start_offset : 0); |
ILO_GPE_VALID_GEN(dev, 7, 7); |
ilo_cp_begin(cp, cmd_len); |
ilo_cp_write(cp, cmd | (cmd_len - 2)); |
ilo_cp_write(cp, vb_access | prim); |
ilo_cp_write(cp, info->count); |
ilo_cp_write(cp, vb_start); |
ilo_cp_write(cp, info->instance_count); |
ilo_cp_write(cp, info->start_instance); |
ilo_cp_write(cp, info->index_bias); |
ilo_cp_end(cp); |
} |
static uint32_t |
gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp) |
{ |
const int state_align = 64 / 4; |
const int state_len = 16 * num_viewports; |
uint32_t state_offset, *dw; |
unsigned i; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 270: |
* |
* "The viewport-specific state used by both the SF and CL units |
* (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each |
* of which contains the DWords described below. The start of each |
* element is spaced 16 DWords apart. The location of first element of |
* the array, as specified by both Pointer to SF_VIEWPORT and Pointer |
* to CLIP_VIEWPORT, is aligned to a 64-byte boundary." |
*/ |
assert(num_viewports && num_viewports <= 16); |
dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT", |
state_len, state_align, &state_offset); |
for (i = 0; i < num_viewports; i++) { |
const struct ilo_viewport_cso *vp = &viewports[i]; |
dw[0] = fui(vp->m00); |
dw[1] = fui(vp->m11); |
dw[2] = fui(vp->m22); |
dw[3] = fui(vp->m30); |
dw[4] = fui(vp->m31); |
dw[5] = fui(vp->m32); |
dw[6] = 0; |
dw[7] = 0; |
dw[8] = fui(vp->min_gbx); |
dw[9] = fui(vp->max_gbx); |
dw[10] = fui(vp->min_gby); |
dw[11] = fui(vp->max_gby); |
dw[12] = 0; |
dw[13] = 0; |
dw[14] = 0; |
dw[15] = 0; |
dw += 16; |
} |
return state_offset; |
} |
void |
ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev, |
unsigned width, unsigned height, |
unsigned depth, unsigned level, |
struct ilo_view_surface *surf) |
{ |
uint32_t *dw; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 62: |
* |
* "A null surface is used in instances where an actual surface is not |
* bound. When a write message is generated to a null surface, no |
* actual surface is written to. When a read message (including any |
* sampling engine message) is generated to a null surface, the result |
* is all zeros. Note that a null surface type is allowed to be used |
* with all messages, even if it is not specificially indicated as |
* supported. All of the remaining fields in surface state are ignored |
* for null surfaces, with the following exceptions: |
* |
* * Width, Height, Depth, LOD, and Render Target View Extent fields |
* must match the depth buffer's corresponding state for all render |
* target surfaces, including null. |
* * All sampling engine and data port messages support null surfaces |
* with the above behavior, even if not mentioned as specifically |
* supported, except for the following: |
* * Data Port Media Block Read/Write messages. |
* * The Surface Type of a surface used as a render target (accessed |
* via the Data Port's Render Target Write message) must be the same |
* as the Surface Type of all other render targets and of the depth |
* buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth |
* buffer or render targets are SURFTYPE_NULL." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 65: |
* |
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be |
* true" |
*/ |
STATIC_ASSERT(Elements(surf->payload) >= 8); |
dw = surf->payload; |
dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT | |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT | |
BRW_SURFACE_TILED << 13; |
dw[1] = 0; |
dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) | |
SET_FIELD(width - 1, GEN7_SURFACE_WIDTH); |
dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH); |
dw[4] = 0; |
dw[5] = level; |
dw[6] = 0; |
dw[7] = 0; |
surf->bo = NULL; |
} |
void |
ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev, |
const struct ilo_buffer *buf, |
unsigned offset, unsigned size, |
unsigned struct_size, |
enum pipe_format elem_format, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
const bool typed = (elem_format != PIPE_FORMAT_NONE); |
const bool structured = (!typed && struct_size > 1); |
const int elem_size = (typed) ? |
util_format_get_blocksize(elem_format) : 1; |
int width, height, depth, pitch; |
int surface_type, surface_format, num_entries; |
uint32_t *dw; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER; |
surface_format = (typed) ? |
ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW; |
num_entries = size / struct_size; |
/* see if there is enough space to fit another element */ |
if (size % struct_size >= elem_size && !structured) |
num_entries++; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 67: |
* |
* "For SURFTYPE_BUFFER render targets, this field (Surface Base |
* Address) specifies the base address of first element of the |
* surface. The surface is interpreted as a simple array of that |
* single element type. The address must be naturally-aligned to the |
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements |
* must be 16-byte aligned) |
* |
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies |
* the base address of the first element of the surface, computed in |
* software by adding the surface base address to the byte offset of |
* the element in the buffer." |
*/ |
if (is_rt) |
assert(offset % elem_size == 0); |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 68: |
* |
* "For typed buffer and structured buffer surfaces, the number of |
* entries in the buffer ranges from 1 to 2^27. For raw buffer |
* surfaces, the number of entries in the buffer is the number of |
* bytes which can range from 1 to 2^30." |
*/ |
assert(num_entries >= 1 && |
num_entries <= 1 << ((typed || structured) ? 27 : 30)); |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 69: |
* |
* "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be |
* 11 if the Surface Format is RAW (the size of the buffer must be a |
* multiple of 4 bytes)." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 70: |
* |
* "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this |
* field (Surface Pitch) indicates the size of the structure." |
* |
* "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch |
* must be a multiple of 4 bytes." |
*/ |
if (structured) |
assert(struct_size % 4 == 0); |
else if (!typed) |
assert(num_entries % 4 == 0); |
pitch = struct_size; |
pitch--; |
num_entries--; |
/* bits [6:0] */ |
width = (num_entries & 0x0000007f); |
/* bits [20:7] */ |
height = (num_entries & 0x001fff80) >> 7; |
/* bits [30:21] */ |
depth = (num_entries & 0x7fe00000) >> 21; |
/* limit to [26:21] */ |
if (typed || structured) |
depth &= 0x3f; |
STATIC_ASSERT(Elements(surf->payload) >= 8); |
dw = surf->payload; |
dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT | |
surface_format << BRW_SURFACE_FORMAT_SHIFT; |
if (render_cache_rw) |
dw[0] |= BRW_SURFACE_RC_READ_WRITE; |
dw[1] = offset; |
dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) | |
SET_FIELD(width, GEN7_SURFACE_WIDTH); |
dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) | |
pitch; |
dw[4] = 0; |
dw[5] = 0; |
dw[6] = 0; |
dw[7] = 0; |
/* do not increment reference count */ |
surf->bo = buf->bo; |
} |
void |
ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, |
const struct ilo_texture *tex, |
enum pipe_format format, |
unsigned first_level, |
unsigned num_levels, |
unsigned first_layer, |
unsigned num_layers, |
bool is_rt, bool render_cache_rw, |
struct ilo_view_surface *surf) |
{ |
int surface_type, surface_format; |
int width, height, depth, pitch, lod; |
unsigned layer_offset, x_offset, y_offset; |
uint32_t *dw; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); |
assert(surface_type != BRW_SURFACE_BUFFER); |
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8) |
format = PIPE_FORMAT_Z32_FLOAT; |
if (is_rt) |
surface_format = ilo_translate_render_format(format); |
else |
surface_format = ilo_translate_texture_format(format); |
assert(surface_format >= 0); |
width = tex->base.width0; |
height = tex->base.height0; |
depth = (tex->base.target == PIPE_TEXTURE_3D) ? |
tex->base.depth0 : num_layers; |
pitch = tex->bo_stride; |
if (surface_type == BRW_SURFACE_CUBE) { |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 70: |
* |
* "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of |
* this field is [0,340], indicating the number of cube array |
* elements (equal to the number of underlying 2D array elements |
* divided by 6). For other surfaces, this field must be zero." |
* |
* When is_rt is true, we treat the texture as a 2D one to avoid the |
* restriction. |
*/ |
if (is_rt) { |
surface_type = BRW_SURFACE_2D; |
} |
else { |
assert(num_layers % 6 == 0); |
depth = num_layers / 6; |
} |
} |
/* sanity check the size */ |
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1); |
assert(first_layer < 2048 && num_layers <= 2048); |
switch (surface_type) { |
case BRW_SURFACE_1D: |
assert(width <= 16384 && height == 1 && depth <= 2048); |
break; |
case BRW_SURFACE_2D: |
assert(width <= 16384 && height <= 16384 && depth <= 2048); |
break; |
case BRW_SURFACE_3D: |
assert(width <= 2048 && height <= 2048 && depth <= 2048); |
if (!is_rt) |
assert(first_layer == 0); |
break; |
case BRW_SURFACE_CUBE: |
assert(width <= 16384 && height <= 16384 && depth <= 86); |
assert(width == height); |
if (is_rt) |
assert(first_layer == 0); |
break; |
default: |
assert(!"unexpected surface type"); |
break; |
} |
if (is_rt) { |
/* |
* Compute the offset to the layer manually. |
* |
* For rendering, the hardware requires LOD to be the same for all |
* render targets and the depth buffer. We need to compute the offset |
* to the layer manually and always set LOD to 0. |
*/ |
if (true) { |
/* we lose the capability for layered rendering */ |
assert(num_layers == 1); |
layer_offset = ilo_texture_get_slice_offset(tex, |
first_level, first_layer, &x_offset, &y_offset); |
assert(x_offset % 4 == 0); |
assert(y_offset % 2 == 0); |
x_offset /= 4; |
y_offset /= 2; |
/* derive the size for the LOD */ |
width = u_minify(width, first_level); |
height = u_minify(height, first_level); |
if (surface_type == BRW_SURFACE_3D) |
depth = u_minify(depth, first_level); |
else |
depth = 1; |
first_level = 0; |
first_layer = 0; |
lod = 0; |
} |
else { |
layer_offset = 0; |
x_offset = 0; |
y_offset = 0; |
} |
assert(num_levels == 1); |
lod = first_level; |
} |
else { |
layer_offset = 0; |
x_offset = 0; |
y_offset = 0; |
lod = num_levels - 1; |
} |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 68: |
* |
* "The Base Address for linear render target surfaces and surfaces |
* accessed with the typed surface read/write data port messages must |
* be element-size aligned, for non-YUV surface formats, or a multiple |
* of 2 element-sizes for YUV surface formats. Other linear surfaces |
* have no alignment requirements (byte alignment is sufficient)." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 70: |
* |
* "For linear render target surfaces and surfaces accessed with the |
* typed data port messages, the pitch must be a multiple of the |
* element size for non-YUV surface formats. Pitch must be a multiple |
* of 2 * element size for YUV surface formats. For linear surfaces |
* with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple |
* of 4 bytes.For other linear surfaces, the pitch can be any multiple |
* of bytes." |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 74: |
* |
* "For linear surfaces, this field (X Offset) must be zero." |
*/ |
if (tex->tiling == INTEL_TILING_NONE) { |
if (is_rt) { |
const int elem_size = util_format_get_blocksize(format); |
assert(layer_offset % elem_size == 0); |
assert(pitch % elem_size == 0); |
} |
assert(!x_offset); |
} |
STATIC_ASSERT(Elements(surf->payload) >= 8); |
dw = surf->payload; |
dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT | |
surface_format << BRW_SURFACE_FORMAT_SHIFT | |
ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 63: |
* |
* "If this field (Surface Array) is enabled, the Surface Type must be |
* SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is |
* disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or |
* SURFTYPE_CUBE, the Depth field must be set to zero." |
* |
* For non-3D sampler surfaces, resinfo (the sampler message) always |
* returns zero for the number of layers when this field is not set. |
*/ |
if (surface_type != BRW_SURFACE_3D) { |
if (util_resource_is_array_texture(&tex->base)) |
dw[0] |= GEN7_SURFACE_IS_ARRAY; |
else |
assert(depth == 1); |
} |
if (tex->valign_4) |
dw[0] |= GEN7_SURFACE_VALIGN_4; |
if (tex->halign_8) |
dw[0] |= GEN7_SURFACE_HALIGN_8; |
if (tex->array_spacing_full) |
dw[0] |= GEN7_SURFACE_ARYSPC_FULL; |
else |
dw[0] |= GEN7_SURFACE_ARYSPC_LOD0; |
if (render_cache_rw) |
dw[0] |= BRW_SURFACE_RC_READ_WRITE; |
if (surface_type == BRW_SURFACE_CUBE && !is_rt) |
dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES; |
dw[1] = layer_offset; |
dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) | |
SET_FIELD(width - 1, GEN7_SURFACE_WIDTH); |
dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) | |
(pitch - 1); |
dw[4] = first_layer << 18 | |
(num_layers - 1) << 7; |
/* |
* MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL |
* means the samples are interleaved. The layouts are the same when the |
* number of samples is 1. |
*/ |
if (tex->interleaved && tex->base.nr_samples > 1) { |
assert(!is_rt); |
dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL; |
} |
else { |
dw[4] |= GEN7_SURFACE_MSFMT_MSS; |
} |
if (tex->base.nr_samples > 4) |
dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8; |
else if (tex->base.nr_samples > 2) |
dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4; |
else |
dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1; |
dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT | |
y_offset << BRW_SURFACE_Y_OFFSET_SHIFT | |
SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) | |
lod; |
dw[6] = 0; |
dw[7] = 0; |
/* do not increment reference count */ |
surf->bo = tex->bo; |
} |
static int |
gen7_estimate_command_size(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen7_command cmd, |
int arg) |
{ |
static const struct { |
int header; |
int body; |
} gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = { |
[ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 }, |
[ILO_GPE_GEN7_STATE_SIP] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 }, |
[ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 }, |
[ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 }, |
[ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 }, |
[ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 }, |
[ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 }, |
[ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 }, |
[ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 }, |
[ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 }, |
[ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 }, |
[ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 }, |
[ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 }, |
[ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 }, |
[ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 }, |
[ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 }, |
[ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 }, |
[ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 }, |
[ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, }, |
[ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 }, |
[ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 }, |
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 }, |
[ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 }, |
[ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 }, |
[ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 }, |
}; |
const int header = gen7_command_size_table[cmd].header; |
const int body = gen7_command_size_table[cmd].body; |
const int count = arg; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT); |
return (likely(count)) ? header + body * count : 0; |
} |
static int |
gen7_estimate_state_size(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen7_state state, |
int arg) |
{ |
static const struct { |
int alignment; |
int body; |
bool is_array; |
} gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = { |
[ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true }, |
[ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true }, |
[ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true }, |
[ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false }, |
[ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true }, |
[ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false }, |
[ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true }, |
[ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true }, |
[ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false }, |
[ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true }, |
[ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false }, |
[ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true }, |
}; |
const int alignment = gen7_state_size_table[state].alignment; |
const int body = gen7_state_size_table[state].body; |
const bool is_array = gen7_state_size_table[state].is_array; |
const int count = arg; |
int estimate; |
ILO_GPE_VALID_GEN(dev, 7, 7); |
assert(state < ILO_GPE_GEN7_STATE_COUNT); |
if (likely(count)) { |
if (is_array) { |
estimate = (alignment - 1) + body * count; |
} |
else { |
estimate = (alignment - 1) + body; |
/* all states are aligned */ |
if (count > 1) |
estimate += util_align_npot(body, alignment) * (count - 1); |
} |
} |
else { |
estimate = 0; |
} |
return estimate; |
} |
static void |
gen7_init(struct ilo_gpe_gen7 *gen7) |
{ |
const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); |
gen7->estimate_command_size = gen7_estimate_command_size; |
gen7->estimate_state_size = gen7_estimate_state_size; |
#define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name |
#define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name |
GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6); |
GEN7_USE(gen7, STATE_SIP, gen6); |
GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6); |
GEN7_USE(gen7, PIPELINE_SELECT, gen6); |
GEN7_USE(gen7, MEDIA_VFE_STATE, gen6); |
GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6); |
GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6); |
GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6); |
GEN7_SET(gen7, GPGPU_WALKER); |
GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS); |
GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6); |
GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6); |
GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6); |
GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6); |
GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6); |
GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6); |
GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS); |
GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6); |
GEN7_USE(gen7, 3DSTATE_VS, gen6); |
GEN7_SET(gen7, 3DSTATE_GS); |
GEN7_USE(gen7, 3DSTATE_CLIP, gen6); |
GEN7_SET(gen7, 3DSTATE_SF); |
GEN7_SET(gen7, 3DSTATE_WM); |
GEN7_SET(gen7, 3DSTATE_CONSTANT_VS); |
GEN7_SET(gen7, 3DSTATE_CONSTANT_GS); |
GEN7_SET(gen7, 3DSTATE_CONSTANT_PS); |
GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK); |
GEN7_SET(gen7, 3DSTATE_CONSTANT_HS); |
GEN7_SET(gen7, 3DSTATE_CONSTANT_DS); |
GEN7_SET(gen7, 3DSTATE_HS); |
GEN7_SET(gen7, 3DSTATE_TE); |
GEN7_SET(gen7, 3DSTATE_DS); |
GEN7_SET(gen7, 3DSTATE_STREAMOUT); |
GEN7_SET(gen7, 3DSTATE_SBE); |
GEN7_SET(gen7, 3DSTATE_PS); |
GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); |
GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC); |
GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS); |
GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS); |
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS); |
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS); |
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS); |
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS); |
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS); |
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS); |
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS); |
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS); |
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS); |
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS); |
GEN7_SET(gen7, 3DSTATE_URB_VS); |
GEN7_SET(gen7, 3DSTATE_URB_HS); |
GEN7_SET(gen7, 3DSTATE_URB_DS); |
GEN7_SET(gen7, 3DSTATE_URB_GS); |
GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6); |
GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6); |
GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6); |
GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6); |
GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6); |
GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6); |
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS); |
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS); |
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS); |
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS); |
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS); |
GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST); |
GEN7_SET(gen7, 3DSTATE_SO_BUFFER); |
GEN7_USE(gen7, PIPE_CONTROL, gen6); |
GEN7_SET(gen7, 3DPRIMITIVE); |
GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6); |
GEN7_SET(gen7, SF_CLIP_VIEWPORT); |
GEN7_USE(gen7, CC_VIEWPORT, gen6); |
GEN7_USE(gen7, COLOR_CALC_STATE, gen6); |
GEN7_USE(gen7, BLEND_STATE, gen6); |
GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6); |
GEN7_USE(gen7, SCISSOR_RECT, gen6); |
GEN7_USE(gen7, BINDING_TABLE_STATE, gen6); |
GEN7_USE(gen7, SURFACE_STATE, gen6); |
GEN7_USE(gen7, SAMPLER_STATE, gen6); |
GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6); |
GEN7_USE(gen7, push_constant_buffer, gen6); |
#undef GEN7_USE |
#undef GEN7_SET |
} |
static struct ilo_gpe_gen7 gen7_gpe; |
const struct ilo_gpe_gen7 * |
ilo_gpe_gen7_get(void) |
{ |
if (!gen7_gpe.estimate_command_size) |
gen7_init(&gen7_gpe); |
return &gen7_gpe; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h |
---|
0,0 → 1,493 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_GPE_GEN7_H |
#define ILO_GPE_GEN7_H |
#include "ilo_common.h" |
#include "ilo_gpe_gen6.h" |
/** |
* Commands that GEN7 GPE could emit. |
*/ |
enum ilo_gpe_gen7_command { |
ILO_GPE_GEN7_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */ |
ILO_GPE_GEN7_STATE_SIP, /* (0x0, 0x1, 0x02) */ |
ILO_GPE_GEN7_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */ |
ILO_GPE_GEN7_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */ |
ILO_GPE_GEN7_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */ |
ILO_GPE_GEN7_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */ |
ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */ |
ILO_GPE_GEN7_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */ |
ILO_GPE_GEN7_GPGPU_WALKER, /* (0x2, 0x1, 0x05) */ |
ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x0, 0x04) */ |
ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x0, 0x05) */ |
ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x0, 0x06) */ |
ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x0, 0x07) */ |
ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */ |
ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */ |
ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */ |
ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */ |
ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */ |
ILO_GPE_GEN7_3DSTATE_VS, /* (0x3, 0x0, 0x10) */ |
ILO_GPE_GEN7_3DSTATE_GS, /* (0x3, 0x0, 0x11) */ |
ILO_GPE_GEN7_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */ |
ILO_GPE_GEN7_3DSTATE_SF, /* (0x3, 0x0, 0x13) */ |
ILO_GPE_GEN7_3DSTATE_WM, /* (0x3, 0x0, 0x14) */ |
ILO_GPE_GEN7_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */ |
ILO_GPE_GEN7_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */ |
ILO_GPE_GEN7_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */ |
ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */ |
ILO_GPE_GEN7_3DSTATE_CONSTANT_HS, /* (0x3, 0x0, 0x19) */ |
ILO_GPE_GEN7_3DSTATE_CONSTANT_DS, /* (0x3, 0x0, 0x1a) */ |
ILO_GPE_GEN7_3DSTATE_HS, /* (0x3, 0x0, 0x1b) */ |
ILO_GPE_GEN7_3DSTATE_TE, /* (0x3, 0x0, 0x1c) */ |
ILO_GPE_GEN7_3DSTATE_DS, /* (0x3, 0x0, 0x1d) */ |
ILO_GPE_GEN7_3DSTATE_STREAMOUT, /* (0x3, 0x0, 0x1e) */ |
ILO_GPE_GEN7_3DSTATE_SBE, /* (0x3, 0x0, 0x1f) */ |
ILO_GPE_GEN7_3DSTATE_PS, /* (0x3, 0x0, 0x20) */ |
ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, /* (0x3, 0x0, 0x21) */ |
ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC, /* (0x3, 0x0, 0x23) */ |
ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS, /* (0x3, 0x0, 0x24) */ |
ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, /* (0x3, 0x0, 0x25) */ |
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, /* (0x3, 0x0, 0x26) */ |
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS, /* (0x3, 0x0, 0x27) */ |
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS, /* (0x3, 0x0, 0x28) */ |
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS, /* (0x3, 0x0, 0x29) */ |
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS, /* (0x3, 0x0, 0x2a) */ |
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, /* (0x3, 0x0, 0x2b) */ |
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS, /* (0x3, 0x0, 0x2c) */ |
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS, /* (0x3, 0x0, 0x2d) */ |
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS, /* (0x3, 0x0, 0x2e) */ |
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS, /* (0x3, 0x0, 0x2f) */ |
ILO_GPE_GEN7_3DSTATE_URB_VS, /* (0x3, 0x0, 0x30) */ |
ILO_GPE_GEN7_3DSTATE_URB_HS, /* (0x3, 0x0, 0x31) */ |
ILO_GPE_GEN7_3DSTATE_URB_DS, /* (0x3, 0x0, 0x32) */ |
ILO_GPE_GEN7_3DSTATE_URB_GS, /* (0x3, 0x0, 0x33) */ |
ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */ |
ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */ |
ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */ |
ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */ |
ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */ |
ILO_GPE_GEN7_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */ |
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, /* (0x3, 0x1, 0x12) */ |
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS, /* (0x3, 0x1, 0x13) */ |
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS, /* (0x3, 0x1, 0x14) */ |
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, /* (0x3, 0x1, 0x15) */ |
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, /* (0x3, 0x1, 0x16) */ |
ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST, /* (0x3, 0x1, 0x17) */ |
ILO_GPE_GEN7_3DSTATE_SO_BUFFER, /* (0x3, 0x1, 0x18) */ |
ILO_GPE_GEN7_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */ |
ILO_GPE_GEN7_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */ |
ILO_GPE_GEN7_COMMAND_COUNT, |
}; |
/** |
* Indirect states that GEN7 GPE could emit. |
*/ |
enum ilo_gpe_gen7_state { |
ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA, |
ILO_GPE_GEN7_SF_CLIP_VIEWPORT, |
ILO_GPE_GEN7_CC_VIEWPORT, |
ILO_GPE_GEN7_COLOR_CALC_STATE, |
ILO_GPE_GEN7_BLEND_STATE, |
ILO_GPE_GEN7_DEPTH_STENCIL_STATE, |
ILO_GPE_GEN7_SCISSOR_RECT, |
ILO_GPE_GEN7_BINDING_TABLE_STATE, |
ILO_GPE_GEN7_SURFACE_STATE, |
ILO_GPE_GEN7_SAMPLER_STATE, |
ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE, |
ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER, |
ILO_GPE_GEN7_STATE_COUNT, |
}; |
typedef ilo_gpe_gen6_STATE_BASE_ADDRESS ilo_gpe_gen7_STATE_BASE_ADDRESS; |
typedef ilo_gpe_gen6_STATE_SIP ilo_gpe_gen7_STATE_SIP; |
typedef ilo_gpe_gen6_3DSTATE_VF_STATISTICS ilo_gpe_gen7_3DSTATE_VF_STATISTICS; |
typedef ilo_gpe_gen6_PIPELINE_SELECT ilo_gpe_gen7_PIPELINE_SELECT; |
typedef ilo_gpe_gen6_MEDIA_VFE_STATE ilo_gpe_gen7_MEDIA_VFE_STATE; |
typedef ilo_gpe_gen6_MEDIA_CURBE_LOAD ilo_gpe_gen7_MEDIA_CURBE_LOAD; |
typedef ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD ilo_gpe_gen7_MEDIA_INTERFACE_DESCRIPTOR_LOAD; |
typedef ilo_gpe_gen6_MEDIA_STATE_FLUSH ilo_gpe_gen7_MEDIA_STATE_FLUSH; |
typedef void |
(*ilo_gpe_gen7_GPGPU_WALKER)(const struct ilo_dev_info *dev, |
struct ilo_cp *cp); |
typedef ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS ilo_gpe_gen7_3DSTATE_CLEAR_PARAMS; |
typedef ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_DEPTH_BUFFER; |
typedef ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER ilo_gpe_gen7_3DSTATE_STENCIL_BUFFER; |
typedef ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_HIER_DEPTH_BUFFER; |
typedef ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS ilo_gpe_gen7_3DSTATE_VERTEX_BUFFERS; |
typedef ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS ilo_gpe_gen7_3DSTATE_VERTEX_ELEMENTS; |
typedef ilo_gpe_gen6_3DSTATE_INDEX_BUFFER ilo_gpe_gen7_3DSTATE_INDEX_BUFFER; |
typedef void |
(*ilo_gpe_gen7_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t color_calc_state, |
struct ilo_cp *cp); |
typedef ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS ilo_gpe_gen7_3DSTATE_SCISSOR_STATE_POINTERS; |
typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS; |
typedef void |
(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *gs, |
int num_samplers, |
struct ilo_cp *cp); |
typedef ilo_gpe_gen6_3DSTATE_CLIP ilo_gpe_gen7_3DSTATE_CLIP; |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SF)(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct pipe_surface *zs_surf, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_WM)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
const struct ilo_rasterizer_state *rasterizer, |
bool cc_may_kill, |
struct ilo_cp *cp); |
typedef ilo_gpe_gen6_3DSTATE_CONSTANT_VS ilo_gpe_gen7_3DSTATE_CONSTANT_VS; |
typedef ilo_gpe_gen6_3DSTATE_CONSTANT_GS ilo_gpe_gen7_3DSTATE_CONSTANT_GS; |
typedef ilo_gpe_gen6_3DSTATE_CONSTANT_PS ilo_gpe_gen7_3DSTATE_CONSTANT_PS; |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev, |
unsigned sample_mask, |
int num_samples, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_CONSTANT_HS)(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_CONSTANT_DS)(const struct ilo_dev_info *dev, |
const uint32_t *bufs, const int *sizes, |
int num_bufs, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_HS)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *hs, |
int num_samplers, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_TE)(const struct ilo_dev_info *dev, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_DS)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *ds, |
int num_samplers, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_STREAMOUT)(const struct ilo_dev_info *dev, |
unsigned buffer_mask, |
int vertex_attrib_count, |
bool rasterizer_discard, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SBE)(const struct ilo_dev_info *dev, |
const struct ilo_rasterizer_state *rasterizer, |
const struct ilo_shader_state *fs, |
const struct ilo_shader_state *last_sh, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_PS)(const struct ilo_dev_info *dev, |
const struct ilo_shader_state *fs, |
int num_samplers, bool dual_blend, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP)(const struct ilo_dev_info *dev, |
uint32_t viewport, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC)(const struct ilo_dev_info *dev, |
uint32_t viewport, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_BLEND_STATE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t blend, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS)(const struct ilo_dev_info *dev, |
uint32_t depth_stencil, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_VS)(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_HS)(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_DS)(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_GS)(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_PS)(const struct ilo_dev_info *dev, |
uint32_t binding_table, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS)(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS)(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS)(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS)(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS)(const struct ilo_dev_info *dev, |
uint32_t sampler_state, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_URB_VS)(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_URB_HS)(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_URB_DS)(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_URB_GS)(const struct ilo_dev_info *dev, |
int offset, int size, int entry_size, |
struct ilo_cp *cp); |
typedef ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE ilo_gpe_gen7_3DSTATE_DRAWING_RECTANGLE; |
typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_OFFSET; |
typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_PATTERN; |
typedef ilo_gpe_gen6_3DSTATE_LINE_STIPPLE ilo_gpe_gen7_3DSTATE_LINE_STIPPLE; |
typedef ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS ilo_gpe_gen7_3DSTATE_AA_LINE_PARAMETERS; |
typedef ilo_gpe_gen6_3DSTATE_MULTISAMPLE ilo_gpe_gen7_3DSTATE_MULTISAMPLE; |
typedef void |
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS)(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS)(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS)(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS)(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS)(const struct ilo_dev_info *dev, |
int offset, int size, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SO_DECL_LIST)(const struct ilo_dev_info *dev, |
const struct pipe_stream_output_info *so_info, |
struct ilo_cp *cp); |
typedef void |
(*ilo_gpe_gen7_3DSTATE_SO_BUFFER)(const struct ilo_dev_info *dev, |
int index, int base, int stride, |
const struct pipe_stream_output_target *so_target, |
struct ilo_cp *cp); |
typedef ilo_gpe_gen6_PIPE_CONTROL ilo_gpe_gen7_PIPE_CONTROL; |
typedef ilo_gpe_gen6_3DPRIMITIVE ilo_gpe_gen7_3DPRIMITIVE; |
typedef ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA ilo_gpe_gen7_INTERFACE_DESCRIPTOR_DATA; |
typedef uint32_t |
(*ilo_gpe_gen7_SF_CLIP_VIEWPORT)(const struct ilo_dev_info *dev, |
const struct ilo_viewport_cso *viewports, |
unsigned num_viewports, |
struct ilo_cp *cp); |
typedef ilo_gpe_gen6_CC_VIEWPORT ilo_gpe_gen7_CC_VIEWPORT; |
typedef ilo_gpe_gen6_COLOR_CALC_STATE ilo_gpe_gen7_COLOR_CALC_STATE; |
typedef ilo_gpe_gen6_BLEND_STATE ilo_gpe_gen7_BLEND_STATE; |
typedef ilo_gpe_gen6_DEPTH_STENCIL_STATE ilo_gpe_gen7_DEPTH_STENCIL_STATE; |
typedef ilo_gpe_gen6_SCISSOR_RECT ilo_gpe_gen7_SCISSOR_RECT; |
typedef ilo_gpe_gen6_BINDING_TABLE_STATE ilo_gpe_gen7_BINDING_TABLE_STATE; |
typedef ilo_gpe_gen6_SURFACE_STATE ilo_gpe_gen7_SURFACE_STATE; |
typedef ilo_gpe_gen6_SAMPLER_STATE ilo_gpe_gen7_SAMPLER_STATE; |
typedef ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE ilo_gpe_gen7_SAMPLER_BORDER_COLOR_STATE; |
typedef ilo_gpe_gen6_push_constant_buffer ilo_gpe_gen7_push_constant_buffer; |
/** |
* GEN7 graphics processing engine |
* |
* \see ilo_gpe_gen6 |
*/ |
struct ilo_gpe_gen7 { |
int (*estimate_command_size)(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen7_command cmd, |
int arg); |
int (*estimate_state_size)(const struct ilo_dev_info *dev, |
enum ilo_gpe_gen7_state state, |
int arg); |
#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name emit_ ## name |
GEN7_EMIT(STATE_BASE_ADDRESS); |
GEN7_EMIT(STATE_SIP); |
GEN7_EMIT(3DSTATE_VF_STATISTICS); |
GEN7_EMIT(PIPELINE_SELECT); |
GEN7_EMIT(MEDIA_VFE_STATE); |
GEN7_EMIT(MEDIA_CURBE_LOAD); |
GEN7_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD); |
GEN7_EMIT(MEDIA_STATE_FLUSH); |
GEN7_EMIT(GPGPU_WALKER); |
GEN7_EMIT(3DSTATE_CLEAR_PARAMS); |
GEN7_EMIT(3DSTATE_DEPTH_BUFFER); |
GEN7_EMIT(3DSTATE_STENCIL_BUFFER); |
GEN7_EMIT(3DSTATE_HIER_DEPTH_BUFFER); |
GEN7_EMIT(3DSTATE_VERTEX_BUFFERS); |
GEN7_EMIT(3DSTATE_VERTEX_ELEMENTS); |
GEN7_EMIT(3DSTATE_INDEX_BUFFER); |
GEN7_EMIT(3DSTATE_CC_STATE_POINTERS); |
GEN7_EMIT(3DSTATE_SCISSOR_STATE_POINTERS); |
GEN7_EMIT(3DSTATE_VS); |
GEN7_EMIT(3DSTATE_GS); |
GEN7_EMIT(3DSTATE_CLIP); |
GEN7_EMIT(3DSTATE_SF); |
GEN7_EMIT(3DSTATE_WM); |
GEN7_EMIT(3DSTATE_CONSTANT_VS); |
GEN7_EMIT(3DSTATE_CONSTANT_GS); |
GEN7_EMIT(3DSTATE_CONSTANT_PS); |
GEN7_EMIT(3DSTATE_SAMPLE_MASK); |
GEN7_EMIT(3DSTATE_CONSTANT_HS); |
GEN7_EMIT(3DSTATE_CONSTANT_DS); |
GEN7_EMIT(3DSTATE_HS); |
GEN7_EMIT(3DSTATE_TE); |
GEN7_EMIT(3DSTATE_DS); |
GEN7_EMIT(3DSTATE_STREAMOUT); |
GEN7_EMIT(3DSTATE_SBE); |
GEN7_EMIT(3DSTATE_PS); |
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); |
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC); |
GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS); |
GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS); |
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS); |
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS); |
GEN7_EMIT(3DSTATE_URB_VS); |
GEN7_EMIT(3DSTATE_URB_HS); |
GEN7_EMIT(3DSTATE_URB_DS); |
GEN7_EMIT(3DSTATE_URB_GS); |
GEN7_EMIT(3DSTATE_DRAWING_RECTANGLE); |
GEN7_EMIT(3DSTATE_POLY_STIPPLE_OFFSET); |
GEN7_EMIT(3DSTATE_POLY_STIPPLE_PATTERN); |
GEN7_EMIT(3DSTATE_LINE_STIPPLE); |
GEN7_EMIT(3DSTATE_AA_LINE_PARAMETERS); |
GEN7_EMIT(3DSTATE_MULTISAMPLE); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS); |
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS); |
GEN7_EMIT(3DSTATE_SO_DECL_LIST); |
GEN7_EMIT(3DSTATE_SO_BUFFER); |
GEN7_EMIT(PIPE_CONTROL); |
GEN7_EMIT(3DPRIMITIVE); |
GEN7_EMIT(INTERFACE_DESCRIPTOR_DATA); |
GEN7_EMIT(SF_CLIP_VIEWPORT); |
GEN7_EMIT(CC_VIEWPORT); |
GEN7_EMIT(COLOR_CALC_STATE); |
GEN7_EMIT(BLEND_STATE); |
GEN7_EMIT(DEPTH_STENCIL_STATE); |
GEN7_EMIT(SCISSOR_RECT); |
GEN7_EMIT(BINDING_TABLE_STATE); |
GEN7_EMIT(SURFACE_STATE); |
GEN7_EMIT(SAMPLER_STATE); |
GEN7_EMIT(SAMPLER_BORDER_COLOR_STATE); |
GEN7_EMIT(push_constant_buffer); |
#undef GEN7_EMIT |
}; |
const struct ilo_gpe_gen7 * |
ilo_gpe_gen7_get(void); |
#endif /* ILO_GPE_GEN7_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpgpu.c |
---|
0,0 → 1,49 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "ilo_context.h" |
#include "ilo_gpgpu.h" |
/* |
* This is a placeholder. We will need something similar to ilo_3d_pipeline. |
*/ |
static void |
ilo_launch_grid(struct pipe_context *pipe, |
const uint *block_layout, const uint *grid_layout, |
uint32_t pc, const void *input) |
{ |
} |
/** |
* Initialize GPGPU-related functions. |
*/ |
void |
ilo_init_gpgpu_functions(struct ilo_context *ilo) |
{ |
ilo->base.launch_grid = ilo_launch_grid; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpgpu.h |
---|
0,0 → 1,38 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_GPGPU_H |
#define ILO_GPGPU_H |
#include "ilo_common.h" |
struct ilo_context; |
void |
ilo_init_gpgpu_functions(struct ilo_context *ilo); |
#endif /* ILO_GPGPU_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_public.h |
---|
0,0 → 1,37 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_PUBLIC_H |
#define ILO_PUBLIC_H |
struct intel_winsys; |
struct pipe_screen; |
struct pipe_screen * |
ilo_screen_create(struct intel_winsys *ws); |
#endif /* ILO_PUBLIC_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_query.c |
---|
0,0 → 1,238 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "intel_winsys.h" |
#include "ilo_3d.h" |
#include "ilo_context.h" |
#include "ilo_cp.h" |
#include "ilo_query.h" |
static const struct { |
const char *name; |
void (*begin)(struct ilo_context *ilo, struct ilo_query *q); |
void (*end)(struct ilo_context *ilo, struct ilo_query *q); |
void (*process)(struct ilo_context *ilo, struct ilo_query *q); |
} query_info[PIPE_QUERY_TYPES] = { |
#define INFO(prefix, desc) { \ |
.name = desc, \ |
.begin = prefix ## _begin_query, \ |
.end = prefix ## _end_query, \ |
.process = prefix ## _process_query, \ |
} |
#define INFOX(prefix, desc) { desc, NULL, NULL, NULL, } |
[PIPE_QUERY_OCCLUSION_COUNTER] = INFO(ilo_3d, "occlusion counter"), |
[PIPE_QUERY_OCCLUSION_PREDICATE] = INFOX(ilo_3d, "occlusion pred."), |
[PIPE_QUERY_TIMESTAMP] = INFO(ilo_3d, "timestamp"), |
[PIPE_QUERY_TIMESTAMP_DISJOINT] = INFOX(ilo_3d, "timestamp disjoint"), |
[PIPE_QUERY_TIME_ELAPSED] = INFO(ilo_3d, "time elapsed"), |
[PIPE_QUERY_PRIMITIVES_GENERATED] = INFO(ilo_3d, "primitives generated"), |
[PIPE_QUERY_PRIMITIVES_EMITTED] = INFO(ilo_3d, "primitives emitted"), |
[PIPE_QUERY_SO_STATISTICS] = INFOX(ilo_3d, "so statistics"), |
[PIPE_QUERY_SO_OVERFLOW_PREDICATE] = INFOX(ilo_3d, "so overflow pred."), |
[PIPE_QUERY_GPU_FINISHED] = INFOX(ilo_3d, "gpu finished"), |
[PIPE_QUERY_PIPELINE_STATISTICS] = INFOX(ilo_3d, "pipeline statistics"), |
#undef INFO |
#undef INFOX |
}; |
static inline struct ilo_query * |
ilo_query(struct pipe_query *query) |
{ |
return (struct ilo_query *) query; |
} |
static struct pipe_query * |
ilo_create_query(struct pipe_context *pipe, unsigned query_type) |
{ |
struct ilo_query *q; |
switch (query_type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
case PIPE_QUERY_TIMESTAMP: |
case PIPE_QUERY_TIME_ELAPSED: |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
break; |
default: |
return NULL; |
} |
q = CALLOC_STRUCT(ilo_query); |
if (!q) |
return NULL; |
q->type = query_type; |
list_inithead(&q->list); |
return (struct pipe_query *) q; |
} |
static void |
ilo_destroy_query(struct pipe_context *pipe, struct pipe_query *query) |
{ |
struct ilo_query *q = ilo_query(query); |
if (q->bo) |
intel_bo_unreference(q->bo); |
FREE(q); |
} |
static void |
ilo_begin_query(struct pipe_context *pipe, struct pipe_query *query) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_query *q = ilo_query(query); |
q->active = true; |
query_info[q->type].begin(ilo, q); |
} |
static void |
ilo_end_query(struct pipe_context *pipe, struct pipe_query *query) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_query *q = ilo_query(query); |
query_info[q->type].end(ilo, q); |
/* |
* some queries such as timestamp query does not require a call to |
* begin_query() so q->active is always false |
*/ |
q->active = false; |
} |
/** |
* The type (union pipe_query_result) indicates only the size of the buffer. |
* Callers expect the result to be "serialized". |
*/ |
static void |
serialize_query_data(unsigned type, const union pipe_query_result *data, |
void *buf) |
{ |
switch (type) { |
case PIPE_QUERY_OCCLUSION_COUNTER: |
case PIPE_QUERY_TIMESTAMP: |
case PIPE_QUERY_TIME_ELAPSED: |
case PIPE_QUERY_PRIMITIVES_GENERATED: |
case PIPE_QUERY_PRIMITIVES_EMITTED: |
{ |
uint64_t *r = buf; |
r[0] = data->u64; |
} |
break; |
default: |
memset(buf, 0, sizeof(union pipe_query_result)); |
break; |
} |
} |
static boolean |
ilo_get_query_result(struct pipe_context *pipe, struct pipe_query *query, |
boolean wait, union pipe_query_result *result) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_query *q = ilo_query(query); |
if (q->active) |
return false; |
if (q->bo) { |
if (intel_bo_references(ilo->cp->bo, q->bo)) |
ilo_cp_flush(ilo->cp); |
if (!wait && intel_bo_is_busy(q->bo)) |
return false; |
query_info[q->type].process(ilo, q); |
} |
if (result) |
serialize_query_data(q->type, &q->data, (void *) result); |
return true; |
} |
/** |
* Allocate a query bo for reading hardware statistics. |
* |
* \param reg_count specifies how many registers need to be read. |
* \param repeat_count specifies how many times the registers are read. If |
* zero or negative, a 4KB bo is allocated. |
*/ |
bool |
ilo_query_alloc_bo(struct ilo_query *q, int reg_count, int repeat_count, |
struct intel_winsys *winsys) |
{ |
const char *name; |
int reg_total; |
name = query_info[q->type].name; |
reg_total = reg_count * repeat_count; |
if (reg_total <= 0) |
reg_total = 4096 / sizeof(uint64_t); |
/* (re-)allocate the bo */ |
if (q->reg_total < reg_total) { |
/* registers are 64-bit */ |
const int size = reg_total * sizeof(uint64_t); |
if (q->bo) |
intel_bo_unreference(q->bo); |
q->bo = intel_winsys_alloc_buffer(winsys, name, size, 0); |
q->reg_total = (q->bo) ? reg_total : 0; |
} |
/* avoid partial reads */ |
if (reg_count) |
q->reg_total -= q->reg_total % reg_count; |
q->reg_read = 0; |
return (q->bo != NULL); |
} |
/** |
* Initialize query-related functions. |
*/ |
void |
ilo_init_query_functions(struct ilo_context *ilo) |
{ |
ilo->base.create_query = ilo_create_query; |
ilo->base.destroy_query = ilo_destroy_query; |
ilo->base.begin_query = ilo_begin_query; |
ilo->base.end_query = ilo_end_query; |
ilo->base.get_query_result = ilo_get_query_result; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_query.h |
---|
0,0 → 1,62 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_QUERY_H |
#define ILO_QUERY_H |
#include "ilo_common.h" |
struct intel_bo; |
struct ilo_context; |
/** |
* Queries can be bound to various places in the driver. While bound, it tells |
* the driver to collect the data indicated by the type of the query. |
*/ |
struct ilo_query { |
unsigned type; |
bool active; |
struct list_head list; |
/* storage for the collected data */ |
union pipe_query_result data; |
/* for queries that need to read hardware statistics */ |
struct intel_bo *bo; |
int reg_read, reg_total; |
int reg_cmd_size; /* in dwords, as expected by ilo_cp */ |
}; |
void |
ilo_init_query_functions(struct ilo_context *ilo); |
bool |
ilo_query_alloc_bo(struct ilo_query *q, int reg_count, int repeat_count, |
struct intel_winsys *winsys); |
#endif /* ILO_QUERY_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_resource.c |
---|
0,0 → 1,1371 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "ilo_screen.h" |
#include "ilo_resource.h" |
/* use PIPE_BIND_CUSTOM to indicate MCS */ |
#define ILO_BIND_MCS PIPE_BIND_CUSTOM |
struct tex_layout { |
const struct ilo_dev_info *dev; |
const struct pipe_resource *templ; |
enum pipe_format format; |
unsigned block_width, block_height, block_size; |
bool compressed; |
bool has_depth, has_stencil, separate_stencil; |
enum intel_tiling_mode tiling; |
bool can_be_linear; |
bool array_spacing_full; |
bool interleaved; |
struct { |
int w, h, d; |
struct ilo_texture_slice *slices; |
} levels[PIPE_MAX_TEXTURE_LEVELS]; |
int align_i, align_j; |
int qpitch; |
int width, height; |
}; |
static void |
tex_layout_init_qpitch(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
int h0, h1; |
if (templ->array_size <= 1) |
return; |
h0 = align(layout->levels[0].h, layout->align_j); |
if (!layout->array_spacing_full) { |
layout->qpitch = h0; |
return; |
} |
h1 = align(layout->levels[1].h, layout->align_j); |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 115: |
* |
* "The following equation is used for surface formats other than |
* compressed textures: |
* |
* QPitch = (h0 + h1 + 11j)" |
* |
* "The equation for compressed textures (BC* and FXT1 surface formats) |
* follows: |
* |
* QPitch = (h0 + h1 + 11j) / 4" |
* |
* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the |
* value calculated in the equation above, for every other odd Surface |
* Height starting from 1 i.e. 1,5,9,13" |
* |
* From the Ivy Bridge PRM, volume 1 part 1, page 111-112: |
* |
* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth |
* buffer and stencil buffer have an implied value of ARYSPC_FULL): |
* |
* QPitch = (h0 + h1 + 12j) |
* QPitch = (h0 + h1 + 12j) / 4 (compressed) |
* |
* (There are many typos or missing words here...)" |
* |
* To access the N-th slice, an offset of (Stride * QPitch * N) is added to |
* the base address. The PRM divides QPitch by 4 for compressed formats |
* because the block height for those formats are 4, and it wants QPitch to |
* mean the number of memory rows, as opposed to texel rows, between |
* slices. Since we use texel rows in tex->slice_offsets, we do not need |
* to divide QPitch by 4. |
*/ |
layout->qpitch = h0 + h1 + |
((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; |
if (layout->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 && |
templ->height0 % 4 == 1) |
layout->qpitch += 4; |
} |
static void |
tex_layout_init_alignments(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 113: |
* |
* "surface format align_i align_j |
* YUV 4:2:2 formats 4 *see below |
* BC1-5 4 4 |
* FXT1 8 4 |
* all other formats 4 *see below" |
* |
* "- align_j = 4 for any depth buffer |
* - align_j = 2 for separate stencil buffer |
* - align_j = 4 for any render target surface is multisampled (4x) |
* - align_j = 4 for any render target surface with Surface Vertical |
* Alignment = VALIGN_4 |
* - align_j = 2 for any render target surface with Surface Vertical |
* Alignment = VALIGN_2 |
* - align_j = 2 for all other render target surface |
* - align_j = 2 for any sampling engine surface with Surface Vertical |
* Alignment = VALIGN_2 |
* - align_j = 4 for any sampling engine surface with Surface Vertical |
* Alignment = VALIGN_4" |
* |
* From the Sandy Bridge PRM, volume 4 part 1, page 86: |
* |
* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if |
* the Surface Format is 96 bits per element (BPE)." |
* |
* They can be rephrased as |
* |
* align_i align_j |
* compressed formats block width block height |
* PIPE_FORMAT_S8_UINT 4 2 |
* other depth/stencil formats 4 4 |
* 4x multisampled 4 4 |
* bpp 96 4 2 |
* others 4 2 or 4 |
*/ |
/* |
* From the Ivy Bridge PRM, volume 1 part 1, page 110: |
* |
* "surface defined by surface format align_i align_j |
* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 |
* not D16_UNORM 4 4 |
* 3DSTATE_STENCIL_BUFFER N/A 8 8 |
* SURFACE_STATE BC*, ETC*, EAC* 4 4 |
* FXT1 8 4 |
* all others (set by SURFACE_STATE)" |
* |
* From the Ivy Bridge PRM, volume 4 part 1, page 63: |
* |
* "- This field (Surface Vertical Aligment) is intended to be set to |
* VALIGN_4 if the surface was rendered as a depth buffer, for a |
* multisampled (4x) render target, or for a multisampled (8x) |
* render target, since these surfaces support only alignment of 4. |
* - Use of VALIGN_4 for other surfaces is supported, but uses more |
* memory. |
* - This field must be set to VALIGN_4 for all tiled Y Render Target |
* surfaces. |
* - Value of 1 is not supported for format YCRCB_NORMAL (0x182), |
* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) |
* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field |
* must be set to VALIGN_4." |
* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." |
* |
* "- This field (Surface Horizontal Aligment) is intended to be set to |
* HALIGN_8 only if the surface was rendered as a depth buffer with |
* Z16 format or a stencil buffer, since these surfaces support only |
* alignment of 8. |
* - Use of HALIGN_8 for other surfaces is supported, but uses more |
* memory. |
* - This field must be set to HALIGN_4 if the Surface Format is BC*. |
* - This field must be set to HALIGN_8 if the Surface Format is |
* FXT1." |
* |
* They can be rephrased as |
* |
* align_i align_j |
* compressed formats block width block height |
* PIPE_FORMAT_Z16_UNORM 8 4 |
* PIPE_FORMAT_S8_UINT 8 8 |
* other depth/stencil formats 4 or 8 4 |
* 2x or 4x multisampled 4 or 8 4 |
* tiled Y 4 or 8 4 (if rt) |
* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 |
* others 4 or 8 2 or 4 |
*/ |
if (layout->compressed) { |
/* this happens to be the case */ |
layout->align_i = layout->block_width; |
layout->align_j = layout->block_height; |
} |
else if (layout->has_depth || layout->has_stencil) { |
if (layout->dev->gen >= ILO_GEN(7)) { |
switch (layout->format) { |
case PIPE_FORMAT_Z16_UNORM: |
layout->align_i = 8; |
layout->align_j = 4; |
break; |
case PIPE_FORMAT_S8_UINT: |
layout->align_i = 8; |
layout->align_j = 8; |
break; |
default: |
/* |
* From the Ivy Bridge PRM, volume 2 part 1, page 319: |
* |
* "The 3 LSBs of both offsets (Depth Coordinate Offset Y and |
* Depth Coordinate Offset X) must be zero to ensure correct |
* alignment" |
* |
* We will make use of them and setting align_i to 8 help us meet |
* the requirement. |
*/ |
layout->align_i = (templ->last_level > 0) ? 8 : 4; |
layout->align_j = 4; |
break; |
} |
} |
else { |
switch (layout->format) { |
case PIPE_FORMAT_S8_UINT: |
layout->align_i = 4; |
layout->align_j = 2; |
break; |
default: |
layout->align_i = 4; |
layout->align_j = 4; |
break; |
} |
} |
} |
else { |
const bool valign_4 = (templ->nr_samples > 1) || |
(layout->dev->gen >= ILO_GEN(7) && |
layout->tiling == INTEL_TILING_Y && |
(templ->bind & PIPE_BIND_RENDER_TARGET)); |
if (valign_4) |
assert(layout->block_size != 12); |
layout->align_i = 4; |
layout->align_j = (valign_4) ? 4 : 2; |
} |
/* |
* the fact that align i and j are multiples of block width and height |
* respectively is what makes the size of the bo a multiple of the block |
* size, slices start at block boundaries, and many of the computations |
* work. |
*/ |
assert(layout->align_i % layout->block_width == 0); |
assert(layout->align_j % layout->block_height == 0); |
/* make sure align() works */ |
assert(util_is_power_of_two(layout->align_i) && |
util_is_power_of_two(layout->align_j)); |
assert(util_is_power_of_two(layout->block_width) && |
util_is_power_of_two(layout->block_height)); |
} |
static void |
tex_layout_init_levels(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
int last_level, lv; |
last_level = templ->last_level; |
/* need at least 2 levels to compute full qpitch */ |
if (last_level == 0 && templ->array_size > 1 && layout->array_spacing_full) |
last_level++; |
/* compute mip level sizes */ |
for (lv = 0; lv <= last_level; lv++) { |
int w, h, d; |
w = u_minify(templ->width0, lv); |
h = u_minify(templ->height0, lv); |
d = u_minify(templ->depth0, lv); |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 114: |
* |
* "The dimensions of the mip maps are first determined by applying |
* the sizing algorithm presented in Non-Power-of-Two Mipmaps |
* above. Then, if necessary, they are padded out to compression |
* block boundaries." |
*/ |
w = align(w, layout->block_width); |
h = align(h, layout->block_height); |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 111: |
* |
* "If the surface is multisampled (4x), these values must be |
* adjusted as follows before proceeding: |
* |
* W_L = ceiling(W_L / 2) * 4 |
* H_L = ceiling(H_L / 2) * 4" |
* |
* From the Ivy Bridge PRM, volume 1 part 1, page 108: |
* |
* "If the surface is multisampled and it is a depth or stencil |
* surface or Multisampled Surface StorageFormat in SURFACE_STATE |
* is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows |
* before proceeding: |
* |
* #samples W_L = H_L = |
* 2 ceiling(W_L / 2) * 4 HL [no adjustment] |
* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 |
* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 |
* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" |
* |
* For interleaved samples (4x), where pixels |
* |
* (x, y ) (x+1, y ) |
* (x, y+1) (x+1, y+1) |
* |
* would be is occupied by |
* |
* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) |
* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) |
* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) |
* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) |
* |
* Thus the need to |
* |
* w = align(w, 2) * 2; |
* y = align(y, 2) * 2; |
*/ |
if (layout->interleaved) { |
switch (templ->nr_samples) { |
case 0: |
case 1: |
break; |
case 2: |
w = align(w, 2) * 2; |
break; |
case 4: |
w = align(w, 2) * 2; |
h = align(h, 2) * 2; |
break; |
case 8: |
w = align(w, 2) * 4; |
h = align(h, 2) * 2; |
break; |
case 16: |
w = align(w, 2) * 4; |
h = align(h, 2) * 4; |
break; |
default: |
assert(!"unsupported sample count"); |
break; |
} |
} |
layout->levels[lv].w = w; |
layout->levels[lv].h = h; |
layout->levels[lv].d = d; |
} |
} |
static void |
tex_layout_init_spacing(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
if (layout->dev->gen >= ILO_GEN(7)) { |
/* |
* It is not explicitly states, but render targets are expected to be |
* UMS/CMS (samples non-interleaved) and depth/stencil buffers are |
* expected to be IMS (samples interleaved). |
* |
* See "Multisampled Surface Storage Format" field of SURFACE_STATE. |
*/ |
if (layout->has_depth || layout->has_stencil) { |
layout->interleaved = true; |
/* |
* From the Ivy Bridge PRM, volume 1 part 1, page 111: |
* |
* "note that the depth buffer and stencil buffer have an implied |
* value of ARYSPC_FULL" |
*/ |
layout->array_spacing_full = true; |
} |
else { |
layout->interleaved = false; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 66: |
* |
* "If Multisampled Surface Storage Format is MSFMT_MSS and |
* Number of Multisamples is not MULTISAMPLECOUNT_1, this field |
* (Surface Array Spacing) must be set to ARYSPC_LOD0." |
* |
* As multisampled resources are not mipmapped, we never use |
* ARYSPC_FULL for them. |
*/ |
if (templ->nr_samples > 1) |
assert(templ->last_level == 0); |
layout->array_spacing_full = (templ->last_level > 0); |
} |
} |
else { |
/* GEN6 supports only interleaved samples */ |
layout->interleaved = true; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 115: |
* |
* "The separate stencil buffer does not support mip mapping, thus |
* the storage for LODs other than LOD 0 is not needed. The |
* following QPitch equation applies only to the separate stencil |
* buffer: |
* |
* QPitch = h_0" |
* |
* GEN6 does not support compact spacing otherwise. |
*/ |
layout->array_spacing_full = (layout->format != PIPE_FORMAT_S8_UINT); |
} |
} |
static void |
tex_layout_init_tiling(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
const enum pipe_format format = layout->format; |
const unsigned tile_none = 1 << INTEL_TILING_NONE; |
const unsigned tile_x = 1 << INTEL_TILING_X; |
const unsigned tile_y = 1 << INTEL_TILING_Y; |
unsigned valid_tilings = tile_none | tile_x | tile_y; |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 32: |
* |
* "Display/Overlay Y-Major not supported. |
* X-Major required for Async Flips" |
*/ |
if (unlikely(templ->bind & PIPE_BIND_SCANOUT)) |
valid_tilings &= tile_x; |
/* |
* From the Sandy Bridge PRM, volume 3 part 2, page 158: |
* |
* "The cursor surface address must be 4K byte aligned. The cursor must |
* be in linear memory, it cannot be tiled." |
*/ |
if (unlikely(templ->bind & PIPE_BIND_CURSOR)) |
valid_tilings &= tile_none; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 76: |
* |
* "The MCS surface must be stored as Tile Y." |
*/ |
if (templ->bind & ILO_BIND_MCS) |
valid_tilings &= tile_y; |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 318: |
* |
* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear |
* Depth Buffer is not supported." |
* |
* "The Depth Buffer, if tiled, must use Y-Major tiling." |
* |
* From the Sandy Bridge PRM, volume 1 part 2, page 22: |
* |
* "W-Major Tile Format is used for separate stencil." |
* |
* Since the HW does not support W-tiled fencing, we have to do it in the |
* driver. |
*/ |
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { |
switch (format) { |
case PIPE_FORMAT_S8_UINT: |
valid_tilings &= tile_none; |
break; |
default: |
valid_tilings &= tile_y; |
break; |
} |
} |
if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { |
if (templ->bind & PIPE_BIND_RENDER_TARGET) { |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 32: |
* |
* "NOTE: 128BPE Format Color buffer ( render target ) MUST be |
* either TileX or Linear." |
*/ |
if (layout->block_size == 16) |
valid_tilings &= ~tile_y; |
/* |
* From the Ivy Bridge PRM, volume 4 part 1, page 63: |
* |
* "This field (Surface Vertical Aligment) must be set to |
* VALIGN_4 for all tiled Y Render Target surfaces." |
* |
* "VALIGN_4 is not supported for surface format |
* R32G32B32_FLOAT." |
*/ |
if (layout->dev->gen >= ILO_GEN(7) && layout->block_size == 12) |
valid_tilings &= ~tile_y; |
} |
/* |
* Also, heuristically set a minimum width/height for enabling tiling. |
*/ |
if (templ->width0 < 64 && (valid_tilings & ~tile_x)) |
valid_tilings &= ~tile_x; |
if ((templ->width0 < 32 || templ->height0 < 16) && |
(templ->width0 < 16 || templ->height0 < 32) && |
(valid_tilings & ~tile_y)) |
valid_tilings &= ~tile_y; |
} |
else { |
/* force linear if we are not sure where the texture is bound to */ |
if (valid_tilings & tile_none) |
valid_tilings &= tile_none; |
} |
/* no conflicting binding flags */ |
assert(valid_tilings); |
/* prefer tiled than linear */ |
if (valid_tilings & tile_y) |
layout->tiling = INTEL_TILING_Y; |
else if (valid_tilings & tile_x) |
layout->tiling = INTEL_TILING_X; |
else |
layout->tiling = INTEL_TILING_NONE; |
layout->can_be_linear = valid_tilings & tile_none; |
} |
static void |
tex_layout_init_format(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
enum pipe_format format; |
const struct util_format_description *desc; |
bool separate_stencil; |
/* GEN7+ requires separate stencil buffers */ |
separate_stencil = (layout->dev->gen >= ILO_GEN(7)); |
switch (templ->format) { |
case PIPE_FORMAT_ETC1_RGB8: |
format = PIPE_FORMAT_R8G8B8X8_UNORM; |
break; |
case PIPE_FORMAT_Z24_UNORM_S8_UINT: |
if (separate_stencil) { |
format = PIPE_FORMAT_Z24X8_UNORM; |
layout->separate_stencil = true; |
} |
else { |
format = templ->format; |
} |
break; |
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: |
if (separate_stencil) { |
format = PIPE_FORMAT_Z32_FLOAT; |
layout->separate_stencil = true; |
} |
else { |
format = templ->format; |
} |
break; |
default: |
format = templ->format; |
break; |
} |
layout->format = format; |
layout->block_width = util_format_get_blockwidth(format); |
layout->block_height = util_format_get_blockheight(format); |
layout->block_size = util_format_get_blocksize(format); |
layout->compressed = util_format_is_compressed(format); |
desc = util_format_description(format); |
layout->has_depth = util_format_has_depth(desc); |
layout->has_stencil = util_format_has_stencil(desc); |
} |
static void |
tex_layout_init(struct tex_layout *layout, |
struct pipe_screen *screen, |
const struct pipe_resource *templ, |
struct ilo_texture_slice **slices) |
{ |
struct ilo_screen *is = ilo_screen(screen); |
memset(layout, 0, sizeof(*layout)); |
layout->dev = &is->dev; |
layout->templ = templ; |
/* note that there are dependencies between these functions */ |
tex_layout_init_format(layout); |
tex_layout_init_tiling(layout); |
tex_layout_init_spacing(layout); |
tex_layout_init_levels(layout); |
tex_layout_init_alignments(layout); |
tex_layout_init_qpitch(layout); |
if (slices) { |
int lv; |
for (lv = 0; lv <= templ->last_level; lv++) |
layout->levels[lv].slices = slices[lv]; |
} |
} |
static bool |
tex_layout_force_linear(struct tex_layout *layout) |
{ |
if (!layout->can_be_linear) |
return false; |
/* |
* we may be able to switch from VALIGN_4 to VALIGN_2 when the layout was |
* Y-tiled, but let's keep it simple |
*/ |
layout->tiling = INTEL_TILING_NONE; |
return true; |
} |
/** |
* Layout a 2D texture. |
*/ |
static void |
tex_layout_2d(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
unsigned int level_x, level_y, num_slices; |
int lv; |
level_x = 0; |
level_y = 0; |
for (lv = 0; lv <= templ->last_level; lv++) { |
const unsigned int level_w = layout->levels[lv].w; |
const unsigned int level_h = layout->levels[lv].h; |
int slice; |
/* set slice offsets */ |
if (layout->levels[lv].slices) { |
for (slice = 0; slice < templ->array_size; slice++) { |
layout->levels[lv].slices[slice].x = level_x; |
/* slices are qpitch apart in Y-direction */ |
layout->levels[lv].slices[slice].y = |
level_y + layout->qpitch * slice; |
} |
} |
/* extend the size of the monolithic bo to cover this mip level */ |
if (layout->width < level_x + level_w) |
layout->width = level_x + level_w; |
if (layout->height < level_y + level_h) |
layout->height = level_y + level_h; |
/* MIPLAYOUT_BELOW */ |
if (lv == 1) |
level_x += align(level_w, layout->align_i); |
else |
level_y += align(level_h, layout->align_j); |
} |
num_slices = templ->array_size; |
/* samples of the same index are stored in a slice */ |
if (templ->nr_samples > 1 && !layout->interleaved) |
num_slices *= templ->nr_samples; |
/* we did not take slices into consideration in the computation above */ |
layout->height += layout->qpitch * (num_slices - 1); |
} |
/** |
* Layout a 3D texture. |
*/ |
static void |
tex_layout_3d(struct tex_layout *layout) |
{ |
const struct pipe_resource *templ = layout->templ; |
unsigned int level_y; |
int lv; |
level_y = 0; |
for (lv = 0; lv <= templ->last_level; lv++) { |
const unsigned int level_w = layout->levels[lv].w; |
const unsigned int level_h = layout->levels[lv].h; |
const unsigned int level_d = layout->levels[lv].d; |
const unsigned int slice_pitch = align(level_w, layout->align_i); |
const unsigned int slice_qpitch = align(level_h, layout->align_j); |
const unsigned int num_slices_per_row = 1 << lv; |
int slice; |
for (slice = 0; slice < level_d; slice += num_slices_per_row) { |
int i; |
/* set slice offsets */ |
if (layout->levels[lv].slices) { |
for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) { |
layout->levels[lv].slices[slice + i].x = slice_pitch * i; |
layout->levels[lv].slices[slice + i].y = level_y; |
} |
} |
/* move on to the next slice row */ |
level_y += slice_qpitch; |
} |
/* rightmost slice */ |
slice = MIN2(num_slices_per_row, level_d) - 1; |
/* extend the size of the monolithic bo to cover this slice */ |
if (layout->width < slice_pitch * slice + level_w) |
layout->width = slice_pitch * slice + level_w; |
if (lv == templ->last_level) |
layout->height = (level_y - slice_qpitch) + level_h; |
} |
} |
static void |
tex_layout_validate(struct tex_layout *layout) |
{ |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 118: |
* |
* "To determine the necessary padding on the bottom and right side of |
* the surface, refer to the table in Section 7.18.3.4 for the i and j |
* parameters for the surface format in use. The surface must then be |
* extended to the next multiple of the alignment unit size in each |
* dimension, and all texels contained in this extended surface must |
* have valid GTT entries." |
* |
* "For cube surfaces, an additional two rows of padding are required |
* at the bottom of the surface. This must be ensured regardless of |
* whether the surface is stored tiled or linear. This is due to the |
* potential rotation of cache line orientation from memory to cache." |
* |
* "For compressed textures (BC* and FXT1 surface formats), padding at |
* the bottom of the surface is to an even compressed row, which is |
* equal to a multiple of 8 uncompressed texel rows. Thus, for padding |
* purposes, these surfaces behave as if j = 8 only for surface |
* padding purposes. The value of 4 for j still applies for mip level |
* alignment and QPitch calculation." |
*/ |
if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) { |
layout->width = align(layout->width, layout->align_i); |
layout->height = align(layout->height, layout->align_j); |
if (layout->templ->target == PIPE_TEXTURE_CUBE) |
layout->height += 2; |
if (layout->compressed) |
layout->height = align(layout->height, layout->align_j * 2); |
} |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 118: |
* |
* "If the surface contains an odd number of rows of data, a final row |
* below the surface must be allocated." |
*/ |
if (layout->templ->bind & PIPE_BIND_RENDER_TARGET) |
layout->height = align(layout->height, 2); |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 22: |
* |
* "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for |
* W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes." |
* |
* Since we ask for INTEL_TILING_NONE instead of the non-existent |
* INTEL_TILING_W, we need to manually align the width and height to the |
* tile boundaries. |
*/ |
if (layout->templ->format == PIPE_FORMAT_S8_UINT) { |
layout->width = align(layout->width, 64); |
layout->height = align(layout->height, 64); |
} |
assert(layout->width % layout->block_width == 0); |
assert(layout->height % layout->block_height == 0); |
assert(layout->qpitch % layout->block_height == 0); |
} |
static size_t |
tex_layout_estimate_size(const struct tex_layout *layout) |
{ |
unsigned stride, height; |
stride = (layout->width / layout->block_width) * layout->block_size; |
height = layout->height / layout->block_height; |
switch (layout->tiling) { |
case INTEL_TILING_X: |
stride = align(stride, 512); |
height = align(height, 8); |
break; |
case INTEL_TILING_Y: |
stride = align(stride, 128); |
height = align(height, 32); |
break; |
default: |
height = align(height, 2); |
break; |
} |
return stride * height; |
} |
static void |
tex_layout_apply(const struct tex_layout *layout, struct ilo_texture *tex) |
{ |
tex->bo_format = layout->format; |
/* in blocks */ |
tex->bo_width = layout->width / layout->block_width; |
tex->bo_height = layout->height / layout->block_height; |
tex->bo_cpp = layout->block_size; |
tex->tiling = layout->tiling; |
tex->compressed = layout->compressed; |
tex->block_width = layout->block_width; |
tex->block_height = layout->block_height; |
tex->halign_8 = (layout->align_i == 8); |
tex->valign_4 = (layout->align_j == 4); |
tex->array_spacing_full = layout->array_spacing_full; |
tex->interleaved = layout->interleaved; |
} |
static void |
tex_free_slices(struct ilo_texture *tex) |
{ |
FREE(tex->slice_offsets[0]); |
} |
static bool |
tex_alloc_slices(struct ilo_texture *tex) |
{ |
const struct pipe_resource *templ = &tex->base; |
struct ilo_texture_slice *slices; |
int depth, lv; |
/* sum the depths of all levels */ |
depth = 0; |
for (lv = 0; lv <= templ->last_level; lv++) |
depth += u_minify(templ->depth0, lv); |
/* |
* There are (depth * tex->base.array_size) slices in total. Either depth |
* is one (non-3D) or templ->array_size is one (non-array), but it does |
* not matter. |
*/ |
slices = CALLOC(depth * templ->array_size, sizeof(*slices)); |
if (!slices) |
return false; |
tex->slice_offsets[0] = slices; |
/* point to the respective positions in the buffer */ |
for (lv = 1; lv <= templ->last_level; lv++) { |
tex->slice_offsets[lv] = tex->slice_offsets[lv - 1] + |
u_minify(templ->depth0, lv - 1) * templ->array_size; |
} |
return true; |
} |
static bool |
tex_create_bo(struct ilo_texture *tex, |
const struct winsys_handle *handle) |
{ |
struct ilo_screen *is = ilo_screen(tex->base.screen); |
const char *name; |
struct intel_bo *bo; |
enum intel_tiling_mode tiling; |
unsigned long pitch; |
switch (tex->base.target) { |
case PIPE_TEXTURE_1D: |
name = "1D texture"; |
break; |
case PIPE_TEXTURE_2D: |
name = "2D texture"; |
break; |
case PIPE_TEXTURE_3D: |
name = "3D texture"; |
break; |
case PIPE_TEXTURE_CUBE: |
name = "cube texture"; |
break; |
case PIPE_TEXTURE_RECT: |
name = "rectangle texture"; |
break; |
case PIPE_TEXTURE_1D_ARRAY: |
name = "1D array texture"; |
break; |
case PIPE_TEXTURE_2D_ARRAY: |
name = "2D array texture"; |
break; |
case PIPE_TEXTURE_CUBE_ARRAY: |
name = "cube array texture"; |
break; |
default: |
name ="unknown texture"; |
break; |
} |
if (handle) { |
bo = intel_winsys_import_handle(is->winsys, name, handle, |
tex->bo_width, tex->bo_height, tex->bo_cpp, |
&tiling, &pitch); |
} |
else { |
bo = intel_winsys_alloc_texture(is->winsys, name, |
tex->bo_width, tex->bo_height, tex->bo_cpp, |
tex->tiling, tex->bo_flags, &pitch); |
tiling = tex->tiling; |
} |
if (!bo) |
return false; |
if (tex->bo) |
intel_bo_unreference(tex->bo); |
tex->bo = bo; |
tex->tiling = tiling; |
tex->bo_stride = pitch; |
return true; |
} |
static void |
tex_destroy(struct ilo_texture *tex) |
{ |
if (tex->separate_s8) |
tex_destroy(tex->separate_s8); |
intel_bo_unreference(tex->bo); |
tex_free_slices(tex); |
FREE(tex); |
} |
static struct pipe_resource * |
tex_create(struct pipe_screen *screen, |
const struct pipe_resource *templ, |
const struct winsys_handle *handle) |
{ |
struct tex_layout layout; |
struct ilo_texture *tex; |
tex = CALLOC_STRUCT(ilo_texture); |
if (!tex) |
return NULL; |
tex->base = *templ; |
tex->base.screen = screen; |
pipe_reference_init(&tex->base.reference, 1); |
if (!tex_alloc_slices(tex)) { |
FREE(tex); |
return NULL; |
} |
tex->imported = (handle != NULL); |
if (tex->base.bind & (PIPE_BIND_DEPTH_STENCIL | |
PIPE_BIND_RENDER_TARGET)) |
tex->bo_flags |= INTEL_ALLOC_FOR_RENDER; |
tex_layout_init(&layout, screen, templ, tex->slice_offsets); |
switch (templ->target) { |
case PIPE_TEXTURE_1D: |
case PIPE_TEXTURE_2D: |
case PIPE_TEXTURE_CUBE: |
case PIPE_TEXTURE_RECT: |
case PIPE_TEXTURE_1D_ARRAY: |
case PIPE_TEXTURE_2D_ARRAY: |
case PIPE_TEXTURE_CUBE_ARRAY: |
tex_layout_2d(&layout); |
break; |
case PIPE_TEXTURE_3D: |
tex_layout_3d(&layout); |
break; |
default: |
assert(!"unknown resource target"); |
break; |
} |
tex_layout_validate(&layout); |
/* make sure the bo can be mapped through GTT if tiled */ |
if (layout.tiling != INTEL_TILING_NONE) { |
/* |
* Usually only the first 256MB of the GTT is mappable. |
* |
* See also how intel_context::max_gtt_map_object_size is calculated. |
*/ |
const size_t mappable_gtt_size = 256 * 1024 * 1024; |
const size_t size = tex_layout_estimate_size(&layout); |
/* be conservative */ |
if (size > mappable_gtt_size / 4) |
tex_layout_force_linear(&layout); |
} |
tex_layout_apply(&layout, tex); |
if (!tex_create_bo(tex, handle)) { |
tex_free_slices(tex); |
FREE(tex); |
return NULL; |
} |
/* allocate separate stencil resource */ |
if (layout.separate_stencil) { |
struct pipe_resource s8_templ = *layout.templ; |
struct pipe_resource *s8; |
/* |
* Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other |
* tilings. But that should be fine since it will never be bound as the |
* stencil buffer, and our transfer code can handle all tilings. |
*/ |
s8_templ.format = PIPE_FORMAT_S8_UINT; |
s8 = screen->resource_create(screen, &s8_templ); |
if (!s8) { |
tex_destroy(tex); |
return NULL; |
} |
tex->separate_s8 = ilo_texture(s8); |
assert(tex->separate_s8->bo_format == PIPE_FORMAT_S8_UINT); |
} |
return &tex->base; |
} |
static bool |
tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle) |
{ |
struct ilo_screen *is = ilo_screen(tex->base.screen); |
int err; |
err = intel_winsys_export_handle(is->winsys, tex->bo, |
tex->tiling, tex->bo_stride, handle); |
return !err; |
} |
/** |
* Estimate the texture size. For large textures, the errors should be pretty |
* small. |
*/ |
static size_t |
tex_estimate_size(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
struct tex_layout layout; |
tex_layout_init(&layout, screen, templ, NULL); |
switch (templ->target) { |
case PIPE_TEXTURE_3D: |
tex_layout_3d(&layout); |
break; |
default: |
tex_layout_2d(&layout); |
break; |
} |
tex_layout_validate(&layout); |
return tex_layout_estimate_size(&layout); |
} |
static bool |
buf_create_bo(struct ilo_buffer *buf) |
{ |
struct ilo_screen *is = ilo_screen(buf->base.screen); |
const char *name; |
struct intel_bo *bo; |
switch (buf->base.bind) { |
case PIPE_BIND_VERTEX_BUFFER: |
name = "vertex buffer"; |
break; |
case PIPE_BIND_INDEX_BUFFER: |
name = "index buffer"; |
break; |
case PIPE_BIND_CONSTANT_BUFFER: |
name = "constant buffer"; |
break; |
case PIPE_BIND_STREAM_OUTPUT: |
name = "stream output"; |
break; |
default: |
name = "unknown buffer"; |
break; |
} |
bo = intel_winsys_alloc_buffer(is->winsys, |
name, buf->bo_size, buf->bo_flags); |
if (!bo) |
return false; |
if (buf->bo) |
intel_bo_unreference(buf->bo); |
buf->bo = bo; |
return true; |
} |
static void |
buf_destroy(struct ilo_buffer *buf) |
{ |
intel_bo_unreference(buf->bo); |
FREE(buf); |
} |
static struct pipe_resource * |
buf_create(struct pipe_screen *screen, const struct pipe_resource *templ) |
{ |
struct ilo_buffer *buf; |
buf = CALLOC_STRUCT(ilo_buffer); |
if (!buf) |
return NULL; |
buf->base = *templ; |
buf->base.screen = screen; |
pipe_reference_init(&buf->base.reference, 1); |
buf->bo_size = templ->width0; |
buf->bo_flags = 0; |
/* |
* From the Sandy Bridge PRM, volume 1 part 1, page 118: |
* |
* "For buffers, which have no inherent "height," padding requirements |
* are different. A buffer must be padded to the next multiple of 256 |
* array elements, with an additional 16 bytes added beyond that to |
* account for the L1 cache line." |
*/ |
if (templ->bind & PIPE_BIND_SAMPLER_VIEW) |
buf->bo_size = align(buf->bo_size, 256) + 16; |
if (!buf_create_bo(buf)) { |
FREE(buf); |
return NULL; |
} |
return &buf->base; |
} |
static boolean |
ilo_can_create_resource(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
/* |
* We do not know if we will fail until we try to allocate the bo. |
* So just set a limit on the texture size. |
*/ |
const size_t max_size = 1 * 1024 * 1024 * 1024; |
size_t size; |
if (templ->target == PIPE_BUFFER) |
size = templ->width0; |
else |
size = tex_estimate_size(screen, templ); |
return (size <= max_size); |
} |
static struct pipe_resource * |
ilo_resource_create(struct pipe_screen *screen, |
const struct pipe_resource *templ) |
{ |
if (templ->target == PIPE_BUFFER) |
return buf_create(screen, templ); |
else |
return tex_create(screen, templ, NULL); |
} |
static struct pipe_resource * |
ilo_resource_from_handle(struct pipe_screen *screen, |
const struct pipe_resource *templ, |
struct winsys_handle *handle) |
{ |
if (templ->target == PIPE_BUFFER) |
return NULL; |
else |
return tex_create(screen, templ, handle); |
} |
static boolean |
ilo_resource_get_handle(struct pipe_screen *screen, |
struct pipe_resource *res, |
struct winsys_handle *handle) |
{ |
if (res->target == PIPE_BUFFER) |
return false; |
else |
return tex_get_handle(ilo_texture(res), handle); |
} |
static void |
ilo_resource_destroy(struct pipe_screen *screen, |
struct pipe_resource *res) |
{ |
if (res->target == PIPE_BUFFER) |
buf_destroy(ilo_buffer(res)); |
else |
tex_destroy(ilo_texture(res)); |
} |
/** |
* Initialize resource-related functions. |
*/ |
void |
ilo_init_resource_functions(struct ilo_screen *is) |
{ |
is->base.can_create_resource = ilo_can_create_resource; |
is->base.resource_create = ilo_resource_create; |
is->base.resource_from_handle = ilo_resource_from_handle; |
is->base.resource_get_handle = ilo_resource_get_handle; |
is->base.resource_destroy = ilo_resource_destroy; |
} |
bool |
ilo_buffer_alloc_bo(struct ilo_buffer *buf) |
{ |
return buf_create_bo(buf); |
} |
bool |
ilo_texture_alloc_bo(struct ilo_texture *tex) |
{ |
/* a shared bo cannot be reallocated */ |
if (tex->imported) |
return false; |
return tex_create_bo(tex, NULL); |
} |
/** |
* Return the offset (in bytes) to a slice within the bo. |
* |
* The returned offset is aligned to tile size. Since slices are not |
* guaranteed to start at tile boundaries, the X and Y offsets (in pixels) |
* from the tile origin to the slice are also returned. X offset is always a |
* multiple of 4 and Y offset is always a multiple of 2. |
*/ |
unsigned |
ilo_texture_get_slice_offset(const struct ilo_texture *tex, |
int level, int slice, |
unsigned *x_offset, unsigned *y_offset) |
{ |
unsigned tile_w, tile_h, tile_size, row_size; |
unsigned x, y, slice_offset; |
/* see the Sandy Bridge PRM, volume 1 part 2, page 24 */ |
switch (tex->tiling) { |
case INTEL_TILING_NONE: |
/* W-tiled */ |
if (tex->bo_format == PIPE_FORMAT_S8_UINT) { |
tile_w = 64; |
tile_h = 64; |
} |
else { |
tile_w = 1; |
tile_h = 1; |
} |
break; |
case INTEL_TILING_X: |
tile_w = 512; |
tile_h = 8; |
break; |
case INTEL_TILING_Y: |
tile_w = 128; |
tile_h = 32; |
break; |
default: |
assert(!"unknown tiling"); |
tile_w = 1; |
tile_h = 1; |
break; |
} |
tile_size = tile_w * tile_h; |
row_size = tex->bo_stride * tile_h; |
/* in bytes */ |
x = tex->slice_offsets[level][slice].x / tex->block_width * tex->bo_cpp; |
y = tex->slice_offsets[level][slice].y / tex->block_height; |
slice_offset = row_size * (y / tile_h) + tile_size * (x / tile_w); |
/* |
* Since tex->bo_stride is a multiple of tile_w, slice_offset should be |
* aligned at this point. |
*/ |
assert(slice_offset % tile_size == 0); |
/* |
* because of the possible values of align_i and align_j in |
* tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of |
* 4 and y_offset is guaranteed to be a multiple of 2. |
*/ |
if (x_offset) { |
/* in pixels */ |
x = (x % tile_w) / tex->bo_cpp * tex->block_width; |
assert(x % 4 == 0); |
*x_offset = x; |
} |
if (y_offset) { |
/* in pixels */ |
y = (y % tile_h) * tex->block_height; |
assert(y % 2 == 0); |
*y_offset = y; |
} |
return slice_offset; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_resource.h |
---|
0,0 → 1,113 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_RESOURCE_H |
#define ILO_RESOURCE_H |
#include "intel_winsys.h" |
#include "ilo_common.h" |
struct ilo_screen; |
struct ilo_buffer { |
struct pipe_resource base; |
struct intel_bo *bo; |
unsigned bo_size; |
unsigned bo_flags; |
}; |
struct ilo_texture { |
struct pipe_resource base; |
bool imported; |
unsigned bo_flags; |
enum pipe_format bo_format; |
struct intel_bo *bo; |
/* |
* These are the values passed to or returned from winsys for bo |
* allocation. As such, |
* |
* - width and height are in blocks, |
* - cpp is the block size in bytes, and |
* - stride is the distance in bytes between two block rows. |
*/ |
int bo_width, bo_height, bo_cpp, bo_stride; |
enum intel_tiling_mode tiling; |
bool compressed; |
unsigned block_width; |
unsigned block_height; |
/* true if the mip level alignments are stricter */ |
bool halign_8, valign_4; |
/* true if space is reserved between layers */ |
bool array_spacing_full; |
/* true if samples are interleaved */ |
bool interleaved; |
/* 2D offsets into a layer/slice/face */ |
struct ilo_texture_slice { |
unsigned x; |
unsigned y; |
} *slice_offsets[PIPE_MAX_TEXTURE_LEVELS]; |
struct ilo_texture *separate_s8; |
}; |
static inline struct ilo_buffer * |
ilo_buffer(struct pipe_resource *res) |
{ |
return (struct ilo_buffer *) |
((res && res->target == PIPE_BUFFER) ? res : NULL); |
} |
static inline struct ilo_texture * |
ilo_texture(struct pipe_resource *res) |
{ |
return (struct ilo_texture *) |
((res && res->target != PIPE_BUFFER) ? res : NULL); |
} |
void |
ilo_init_resource_functions(struct ilo_screen *is); |
bool |
ilo_buffer_alloc_bo(struct ilo_buffer *buf); |
bool |
ilo_texture_alloc_bo(struct ilo_texture *tex); |
unsigned |
ilo_texture_get_slice_offset(const struct ilo_texture *tex, |
int level, int slice, |
unsigned *x_offset, unsigned *y_offset); |
#endif /* ILO_RESOURCE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_screen.c |
---|
0,0 → 1,752 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_format_s3tc.h" |
#include "vl/vl_decoder.h" |
#include "vl/vl_video_buffer.h" |
#include "intel_chipset.h" |
#include "intel_reg.h" /* for TIMESTAMP */ |
#include "intel_winsys.h" |
#include "ilo_context.h" |
#include "ilo_format.h" |
#include "ilo_resource.h" |
#include "ilo_public.h" |
#include "ilo_screen.h" |
int ilo_debug; |
static const struct debug_named_value ilo_debug_flags[] = { |
{ "3d", ILO_DEBUG_3D, "Dump 3D commands and states" }, |
{ "vs", ILO_DEBUG_VS, "Dump vertex shaders" }, |
{ "gs", ILO_DEBUG_GS, "Dump geometry shaders" }, |
{ "fs", ILO_DEBUG_FS, "Dump fragment shaders" }, |
{ "cs", ILO_DEBUG_CS, "Dump compute shaders" }, |
{ "nohw", ILO_DEBUG_NOHW, "Do not send commands to HW" }, |
{ "nocache", ILO_DEBUG_NOCACHE, "Always invalidate HW caches" }, |
DEBUG_NAMED_VALUE_END |
}; |
static float |
ilo_get_paramf(struct pipe_screen *screen, enum pipe_capf param) |
{ |
switch (param) { |
case PIPE_CAPF_MAX_LINE_WIDTH: |
/* in U3.7, defined in 3DSTATE_SF */ |
return 7.0f; |
case PIPE_CAPF_MAX_LINE_WIDTH_AA: |
/* line width minus one, which is reserved for AA region */ |
return 6.0f; |
case PIPE_CAPF_MAX_POINT_WIDTH: |
/* in U8.3, defined in 3DSTATE_SF */ |
return 255.0f; |
case PIPE_CAPF_MAX_POINT_WIDTH_AA: |
/* same as point width, as we ignore rasterizer->point_smooth */ |
return 255.0f; |
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: |
/* [2.0, 16.0], defined in SAMPLER_STATE */ |
return 16.0f; |
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: |
/* [-16.0, 16.0), defined in SAMPLER_STATE */ |
return 15.0f; |
case PIPE_CAPF_GUARD_BAND_LEFT: |
case PIPE_CAPF_GUARD_BAND_TOP: |
case PIPE_CAPF_GUARD_BAND_RIGHT: |
case PIPE_CAPF_GUARD_BAND_BOTTOM: |
/* what are these for? */ |
return 0.0f; |
default: |
return 0.0f; |
} |
} |
static int |
ilo_get_shader_param(struct pipe_screen *screen, unsigned shader, |
enum pipe_shader_cap param) |
{ |
switch (shader) { |
case PIPE_SHADER_FRAGMENT: |
case PIPE_SHADER_VERTEX: |
case PIPE_SHADER_GEOMETRY: |
break; |
default: |
return 0; |
} |
switch (param) { |
/* the limits are copied from the classic driver */ |
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: |
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 16384; |
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: |
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 0; |
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: |
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 0; |
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: |
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 0; |
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: |
return UINT_MAX; |
case PIPE_SHADER_CAP_MAX_INPUTS: |
/* this is limited by how many attributes SF can remap */ |
return 16; |
case PIPE_SHADER_CAP_MAX_CONSTS: |
return 1024; |
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: |
return ILO_MAX_CONST_BUFFERS; |
case PIPE_SHADER_CAP_MAX_TEMPS: |
return 256; |
case PIPE_SHADER_CAP_MAX_ADDRS: |
return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1; |
case PIPE_SHADER_CAP_MAX_PREDS: |
return 0; |
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: |
return 1; |
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: |
return 0; |
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: |
return 0; |
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: |
return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1; |
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: |
return 1; |
case PIPE_SHADER_CAP_SUBROUTINES: |
return 0; |
case PIPE_SHADER_CAP_INTEGERS: |
return 1; |
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: |
return ILO_MAX_SAMPLERS; |
case PIPE_SHADER_CAP_PREFERRED_IR: |
return PIPE_SHADER_IR_TGSI; |
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: |
return 1; |
default: |
return 0; |
} |
} |
static int |
ilo_get_video_param(struct pipe_screen *screen, |
enum pipe_video_profile profile, |
enum pipe_video_cap param) |
{ |
switch (param) { |
case PIPE_VIDEO_CAP_SUPPORTED: |
return vl_profile_supported(screen, profile); |
case PIPE_VIDEO_CAP_NPOT_TEXTURES: |
return 1; |
case PIPE_VIDEO_CAP_MAX_WIDTH: |
case PIPE_VIDEO_CAP_MAX_HEIGHT: |
return vl_video_buffer_max_size(screen); |
case PIPE_VIDEO_CAP_PREFERED_FORMAT: |
return PIPE_FORMAT_NV12; |
case PIPE_VIDEO_CAP_PREFERS_INTERLACED: |
return 1; |
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: |
return 1; |
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: |
return 0; |
default: |
return 0; |
} |
} |
static int |
ilo_get_compute_param(struct pipe_screen *screen, |
enum pipe_compute_cap param, |
void *ret) |
{ |
union { |
const char *ir_target; |
uint64_t grid_dimension; |
uint64_t max_grid_size[3]; |
uint64_t max_block_size[3]; |
uint64_t max_threads_per_block; |
uint64_t max_global_size; |
uint64_t max_local_size; |
uint64_t max_private_size; |
uint64_t max_input_size; |
uint64_t max_mem_alloc_size; |
} val; |
const void *ptr; |
int size; |
/* XXX some randomly chosen values */ |
switch (param) { |
case PIPE_COMPUTE_CAP_IR_TARGET: |
val.ir_target = "ilog"; |
ptr = val.ir_target; |
size = strlen(val.ir_target) + 1; |
break; |
case PIPE_COMPUTE_CAP_GRID_DIMENSION: |
val.grid_dimension = Elements(val.max_grid_size); |
ptr = &val.grid_dimension; |
size = sizeof(val.grid_dimension); |
break; |
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: |
val.max_grid_size[0] = 65535; |
val.max_grid_size[1] = 65535; |
val.max_grid_size[2] = 1; |
ptr = &val.max_grid_size; |
size = sizeof(val.max_grid_size); |
break; |
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: |
val.max_block_size[0] = 512; |
val.max_block_size[1] = 512; |
val.max_block_size[2] = 512; |
ptr = &val.max_block_size; |
size = sizeof(val.max_block_size); |
break; |
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: |
val.max_threads_per_block = 512; |
ptr = &val.max_threads_per_block; |
size = sizeof(val.max_threads_per_block); |
break; |
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: |
val.max_global_size = 4; |
ptr = &val.max_global_size; |
size = sizeof(val.max_global_size); |
break; |
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: |
val.max_local_size = 64 * 1024; |
ptr = &val.max_local_size; |
size = sizeof(val.max_local_size); |
break; |
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: |
val.max_private_size = 32768; |
ptr = &val.max_private_size; |
size = sizeof(val.max_private_size); |
break; |
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: |
val.max_input_size = 256; |
ptr = &val.max_input_size; |
size = sizeof(val.max_input_size); |
break; |
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: |
val.max_mem_alloc_size = 128 * 1024 * 1024; |
ptr = &val.max_mem_alloc_size; |
size = sizeof(val.max_mem_alloc_size); |
break; |
default: |
ptr = NULL; |
size = 0; |
break; |
} |
if (ret) |
memcpy(ret, ptr, size); |
return size; |
} |
static int |
ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) |
{ |
struct ilo_screen *is = ilo_screen(screen); |
switch (param) { |
case PIPE_CAP_NPOT_TEXTURES: |
case PIPE_CAP_TWO_SIDED_STENCIL: |
return true; |
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: |
return 0; /* TODO */ |
case PIPE_CAP_ANISOTROPIC_FILTER: |
case PIPE_CAP_POINT_SPRITE: |
return true; |
case PIPE_CAP_MAX_RENDER_TARGETS: |
return ILO_MAX_DRAW_BUFFERS; |
case PIPE_CAP_OCCLUSION_QUERY: |
case PIPE_CAP_QUERY_TIME_ELAPSED: |
case PIPE_CAP_TEXTURE_SHADOW_MAP: |
case PIPE_CAP_TEXTURE_SWIZZLE: /* must be supported for shadow map */ |
return true; |
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: |
/* |
* As defined in SURFACE_STATE, we have |
* |
* Max WxHxD for 2D and CUBE Max WxHxD for 3D |
* GEN6 8192x8192x512 2048x2048x2048 |
* GEN7 16384x16384x2048 2048x2048x2048 |
* |
* However, when the texutre size is large, things become unstable. We |
* require the maximum texture size to be 2^30 bytes in |
* screen->can_create_resource(). Since the maximum pixel size is 2^4 |
* bytes (PIPE_FORMAT_R32G32B32A32_FLOAT), textures should not have more |
* than 2^26 pixels. |
* |
* For 3D textures, we have to set the maximum number of levels to 9, |
* which has at most 2^24 pixels. For 2D textures, we set it to 14, |
* which has at most 2^26 pixels. And for cube textures, we has to set |
* it to 12. |
*/ |
return 14; |
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: |
return 9; |
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: |
return 12; |
case PIPE_CAP_TEXTURE_MIRROR_CLAMP: |
return false; |
case PIPE_CAP_BLEND_EQUATION_SEPARATE: |
case PIPE_CAP_SM3: |
return true; |
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: |
if (is->dev.gen >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset) |
return 0; |
return ILO_MAX_SO_BUFFERS; |
case PIPE_CAP_PRIMITIVE_RESTART: |
return true; |
case PIPE_CAP_MAX_COMBINED_SAMPLERS: |
return ILO_MAX_SAMPLERS * 2; |
case PIPE_CAP_INDEP_BLEND_ENABLE: |
case PIPE_CAP_INDEP_BLEND_FUNC: |
return true; |
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: |
return (is->dev.gen >= ILO_GEN(7)) ? 2048 : 512; |
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: |
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: |
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: |
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: |
case PIPE_CAP_DEPTH_CLIP_DISABLE: |
return true; |
case PIPE_CAP_SHADER_STENCIL_EXPORT: |
return false; |
case PIPE_CAP_TGSI_INSTANCEID: |
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: |
return true; |
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: |
return false; |
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: |
return true; |
case PIPE_CAP_SEAMLESS_CUBE_MAP: |
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: |
case PIPE_CAP_SCALED_RESOLVE: |
return true; |
case PIPE_CAP_MIN_TEXEL_OFFSET: |
return -8; |
case PIPE_CAP_MAX_TEXEL_OFFSET: |
return 7; |
case PIPE_CAP_CONDITIONAL_RENDER: |
case PIPE_CAP_TEXTURE_BARRIER: |
return true; |
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: |
return ILO_MAX_SO_BINDINGS / ILO_MAX_SO_BUFFERS; |
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: |
return ILO_MAX_SO_BINDINGS; |
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: |
if (is->dev.gen >= ILO_GEN(7)) |
return is->dev.has_gen7_sol_reset; |
else |
return false; /* TODO */ |
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: |
return false; |
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: |
return true; |
case PIPE_CAP_VERTEX_COLOR_CLAMPED: |
return false; |
case PIPE_CAP_GLSL_FEATURE_LEVEL: |
return 140; |
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: |
case PIPE_CAP_USER_VERTEX_BUFFERS: |
return false; |
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: |
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: |
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: |
return false; |
case PIPE_CAP_COMPUTE: |
return false; /* TODO */ |
case PIPE_CAP_USER_INDEX_BUFFERS: |
case PIPE_CAP_USER_CONSTANT_BUFFERS: |
return true; |
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: |
/* imposed by OWord (Dual) Block Read */ |
return 16; |
case PIPE_CAP_START_INSTANCE: |
case PIPE_CAP_QUERY_TIMESTAMP: |
return true; |
case PIPE_CAP_TEXTURE_MULTISAMPLE: |
return false; /* TODO */ |
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: |
return 0; |
case PIPE_CAP_CUBE_MAP_ARRAY: |
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: |
return true; |
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: |
return 1; |
case PIPE_CAP_TGSI_TEXCOORD: |
return false; |
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: |
return true; |
case PIPE_CAP_QUERY_PIPELINE_STATISTICS: |
return false; /* TODO */ |
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: |
return 0; |
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: |
/* a BRW_SURFACE_BUFFER can have up to 2^27 elements */ |
return 1 << 27; |
case PIPE_CAP_MAX_VIEWPORTS: |
return ILO_MAX_VIEWPORTS; |
case PIPE_CAP_ENDIANNESS: |
return PIPE_ENDIAN_LITTLE; |
default: |
return 0; |
} |
} |
static const char * |
ilo_get_vendor(struct pipe_screen *screen) |
{ |
return "LunarG, Inc."; |
} |
static const char * |
ilo_get_name(struct pipe_screen *screen) |
{ |
struct ilo_screen *is = ilo_screen(screen); |
const char *chipset; |
/* stolen from classic i965 */ |
switch (is->dev.devid) { |
case PCI_CHIP_SANDYBRIDGE_GT1: |
case PCI_CHIP_SANDYBRIDGE_GT2: |
case PCI_CHIP_SANDYBRIDGE_GT2_PLUS: |
chipset = "Intel(R) Sandybridge Desktop"; |
break; |
case PCI_CHIP_SANDYBRIDGE_M_GT1: |
case PCI_CHIP_SANDYBRIDGE_M_GT2: |
case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS: |
chipset = "Intel(R) Sandybridge Mobile"; |
break; |
case PCI_CHIP_SANDYBRIDGE_S: |
chipset = "Intel(R) Sandybridge Server"; |
break; |
case PCI_CHIP_IVYBRIDGE_GT1: |
case PCI_CHIP_IVYBRIDGE_GT2: |
chipset = "Intel(R) Ivybridge Desktop"; |
break; |
case PCI_CHIP_IVYBRIDGE_M_GT1: |
case PCI_CHIP_IVYBRIDGE_M_GT2: |
chipset = "Intel(R) Ivybridge Mobile"; |
break; |
case PCI_CHIP_IVYBRIDGE_S_GT1: |
case PCI_CHIP_IVYBRIDGE_S_GT2: |
chipset = "Intel(R) Ivybridge Server"; |
break; |
case PCI_CHIP_BAYTRAIL_M_1: |
case PCI_CHIP_BAYTRAIL_M_2: |
case PCI_CHIP_BAYTRAIL_M_3: |
case PCI_CHIP_BAYTRAIL_M_4: |
case PCI_CHIP_BAYTRAIL_D: |
chipset = "Intel(R) Bay Trail"; |
break; |
case PCI_CHIP_HASWELL_GT1: |
case PCI_CHIP_HASWELL_GT2: |
case PCI_CHIP_HASWELL_GT3: |
case PCI_CHIP_HASWELL_SDV_GT1: |
case PCI_CHIP_HASWELL_SDV_GT2: |
case PCI_CHIP_HASWELL_SDV_GT3: |
case PCI_CHIP_HASWELL_ULT_GT1: |
case PCI_CHIP_HASWELL_ULT_GT2: |
case PCI_CHIP_HASWELL_ULT_GT3: |
case PCI_CHIP_HASWELL_CRW_GT1: |
case PCI_CHIP_HASWELL_CRW_GT2: |
case PCI_CHIP_HASWELL_CRW_GT3: |
chipset = "Intel(R) Haswell Desktop"; |
break; |
case PCI_CHIP_HASWELL_M_GT1: |
case PCI_CHIP_HASWELL_M_GT2: |
case PCI_CHIP_HASWELL_M_GT3: |
case PCI_CHIP_HASWELL_SDV_M_GT1: |
case PCI_CHIP_HASWELL_SDV_M_GT2: |
case PCI_CHIP_HASWELL_SDV_M_GT3: |
case PCI_CHIP_HASWELL_ULT_M_GT1: |
case PCI_CHIP_HASWELL_ULT_M_GT2: |
case PCI_CHIP_HASWELL_ULT_M_GT3: |
case PCI_CHIP_HASWELL_CRW_M_GT1: |
case PCI_CHIP_HASWELL_CRW_M_GT2: |
case PCI_CHIP_HASWELL_CRW_M_GT3: |
chipset = "Intel(R) Haswell Mobile"; |
break; |
case PCI_CHIP_HASWELL_S_GT1: |
case PCI_CHIP_HASWELL_S_GT2: |
case PCI_CHIP_HASWELL_S_GT3: |
case PCI_CHIP_HASWELL_SDV_S_GT1: |
case PCI_CHIP_HASWELL_SDV_S_GT2: |
case PCI_CHIP_HASWELL_SDV_S_GT3: |
case PCI_CHIP_HASWELL_ULT_S_GT1: |
case PCI_CHIP_HASWELL_ULT_S_GT2: |
case PCI_CHIP_HASWELL_ULT_S_GT3: |
case PCI_CHIP_HASWELL_CRW_S_GT1: |
case PCI_CHIP_HASWELL_CRW_S_GT2: |
case PCI_CHIP_HASWELL_CRW_S_GT3: |
chipset = "Intel(R) Haswell Server"; |
break; |
default: |
chipset = "Unknown Intel Chipset"; |
break; |
} |
return chipset; |
} |
static uint64_t |
ilo_get_timestamp(struct pipe_screen *screen) |
{ |
struct ilo_screen *is = ilo_screen(screen); |
union { |
uint64_t val; |
uint32_t dw[2]; |
} timestamp; |
intel_winsys_read_reg(is->winsys, TIMESTAMP, ×tamp.val); |
/* |
* From the Ivy Bridge PRM, volume 1 part 3, page 107: |
* |
* "Note: This timestamp register reflects the value of the PCU TSC. |
* The PCU TSC counts 10ns increments; this timestamp reflects bits |
* 38:3 of the TSC (i.e. 80ns granularity, rolling over every 1.5 |
* hours)." |
* |
* However, it seems dw[0] is garbage and dw[1] contains the lower 32 bits |
* of the timestamp. We will have to live with a timestamp that rolls over |
* every ~343 seconds. |
* |
* See also brw_get_timestamp(). |
*/ |
return (uint64_t) timestamp.dw[1] * 80; |
} |
static void |
ilo_fence_reference(struct pipe_screen *screen, |
struct pipe_fence_handle **p, |
struct pipe_fence_handle *f) |
{ |
struct ilo_fence **ptr = (struct ilo_fence **) p; |
struct ilo_fence *fence = ilo_fence(f); |
if (!ptr) { |
/* still need to reference fence */ |
if (fence) |
pipe_reference(NULL, &fence->reference); |
return; |
} |
/* reference fence and dereference the one pointed to by ptr */ |
if (*ptr && pipe_reference(&(*ptr)->reference, &fence->reference)) { |
struct ilo_fence *old = *ptr; |
if (old->bo) |
intel_bo_unreference(old->bo); |
FREE(old); |
} |
*ptr = fence; |
} |
static boolean |
ilo_fence_signalled(struct pipe_screen *screen, |
struct pipe_fence_handle *f) |
{ |
struct ilo_fence *fence = ilo_fence(f); |
/* mark signalled if the bo is idle */ |
if (fence->bo && !intel_bo_is_busy(fence->bo)) { |
intel_bo_unreference(fence->bo); |
fence->bo = NULL; |
} |
return (fence->bo == NULL); |
} |
static boolean |
ilo_fence_finish(struct pipe_screen *screen, |
struct pipe_fence_handle *f, |
uint64_t timeout) |
{ |
struct ilo_fence *fence = ilo_fence(f); |
const int64_t wait_timeout = (timeout > INT64_MAX) ? -1 : timeout; |
/* already signalled */ |
if (!fence->bo) |
return true; |
/* wait and see if it returns error */ |
if (intel_bo_wait(fence->bo, wait_timeout)) |
return false; |
/* mark signalled */ |
intel_bo_unreference(fence->bo); |
fence->bo = NULL; |
return true; |
} |
static void |
ilo_screen_destroy(struct pipe_screen *screen) |
{ |
struct ilo_screen *is = ilo_screen(screen); |
/* as it seems, winsys is owned by the screen */ |
intel_winsys_destroy(is->winsys); |
FREE(is); |
} |
static bool |
init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info) |
{ |
dev->devid = info->devid; |
dev->has_llc = info->has_llc; |
dev->has_gen7_sol_reset = info->has_gen7_sol_reset; |
dev->has_address_swizzling = info->has_address_swizzling; |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 18: |
* |
* "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged |
* as 1024 256-bit rows. The GT2 product's URB provides 64KB of |
* storage, arranged as 2048 256-bit rows. A row corresponds in size |
* to an EU GRF register. Read/write access to the URB is generally |
* supported on a row-granular basis." |
* |
* From the Ivy Bridge PRM, volume 4 part 2, page 17: |
* |
* "URB Size URB Rows URB Rows when SLM Enabled |
* 128k 4096 2048 |
* 256k 8096 4096" |
*/ |
if (IS_HASWELL(info->devid)) { |
dev->gen = ILO_GEN(7.5); |
if (IS_HSW_GT3(info->devid)) { |
dev->gt = 3; |
dev->urb_size = 512 * 1024; |
} |
else if (IS_HSW_GT2(info->devid)) { |
dev->gt = 2; |
dev->urb_size = 256 * 1024; |
} |
else { |
dev->gt = 1; |
dev->urb_size = 128 * 1024; |
} |
} |
else if (IS_GEN7(info->devid)) { |
dev->gen = ILO_GEN(7); |
if (IS_IVB_GT2(info->devid)) { |
dev->gt = 2; |
dev->urb_size = 256 * 1024; |
} |
else { |
dev->gt = 1; |
dev->urb_size = 128 * 1024; |
} |
} |
else if (IS_GEN6(info->devid)) { |
dev->gen = ILO_GEN(6); |
if (IS_SNB_GT2(info->devid)) { |
dev->gt = 2; |
dev->urb_size = 64 * 1024; |
} |
else { |
dev->gt = 1; |
dev->urb_size = 32 * 1024; |
} |
} |
else { |
ilo_err("unknown GPU generation\n"); |
return false; |
} |
return true; |
} |
struct pipe_screen * |
ilo_screen_create(struct intel_winsys *ws) |
{ |
struct ilo_screen *is; |
const struct intel_winsys_info *info; |
ilo_debug = debug_get_flags_option("ILO_DEBUG", ilo_debug_flags, 0); |
is = CALLOC_STRUCT(ilo_screen); |
if (!is) |
return NULL; |
is->winsys = ws; |
intel_winsys_enable_reuse(is->winsys); |
info = intel_winsys_get_info(is->winsys); |
if (!init_dev(&is->dev, info)) { |
FREE(is); |
return NULL; |
} |
util_format_s3tc_init(); |
is->base.destroy = ilo_screen_destroy; |
is->base.get_name = ilo_get_name; |
is->base.get_vendor = ilo_get_vendor; |
is->base.get_param = ilo_get_param; |
is->base.get_paramf = ilo_get_paramf; |
is->base.get_shader_param = ilo_get_shader_param; |
is->base.get_video_param = ilo_get_video_param; |
is->base.get_compute_param = ilo_get_compute_param; |
is->base.get_timestamp = ilo_get_timestamp; |
is->base.flush_frontbuffer = NULL; |
is->base.fence_reference = ilo_fence_reference; |
is->base.fence_signalled = ilo_fence_signalled; |
is->base.fence_finish = ilo_fence_finish; |
is->base.get_driver_query_info = NULL; |
ilo_init_format_functions(is); |
ilo_init_context_functions(is); |
ilo_init_resource_functions(is); |
return &is->base; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_screen.h |
---|
0,0 → 1,63 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_SCREEN_H |
#define ILO_SCREEN_H |
#include "pipe/p_screen.h" |
#include "pipe/p_state.h" |
#include "ilo_common.h" |
struct intel_winsys; |
struct intel_bo; |
struct ilo_fence { |
struct pipe_reference reference; |
struct intel_bo *bo; |
}; |
struct ilo_screen { |
struct pipe_screen base; |
struct intel_winsys *winsys; |
struct ilo_dev_info dev; |
}; |
static inline struct ilo_screen * |
ilo_screen(struct pipe_screen *screen) |
{ |
return (struct ilo_screen *) screen; |
} |
static inline struct ilo_fence * |
ilo_fence(struct pipe_fence_handle *fence) |
{ |
return (struct ilo_fence *) fence; |
} |
#endif /* ILO_SCREEN_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_shader.c |
---|
0,0 → 1,1169 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "tgsi/tgsi_parse.h" |
#include "intel_winsys.h" |
#include "brw_defines.h" /* for SBE setup */ |
#include "shader/ilo_shader_internal.h" |
#include "ilo_state.h" |
#include "ilo_shader.h" |
struct ilo_shader_cache { |
struct list_head shaders; |
struct list_head changed; |
}; |
/** |
* Create a shader cache. A shader cache can manage shaders and upload them |
* to a bo as a whole. |
*/ |
struct ilo_shader_cache * |
ilo_shader_cache_create(void) |
{ |
struct ilo_shader_cache *shc; |
shc = CALLOC_STRUCT(ilo_shader_cache); |
if (!shc) |
return NULL; |
list_inithead(&shc->shaders); |
list_inithead(&shc->changed); |
return shc; |
} |
/** |
* Destroy a shader cache. |
*/ |
void |
ilo_shader_cache_destroy(struct ilo_shader_cache *shc) |
{ |
FREE(shc); |
} |
/** |
* Add a shader to the cache. |
*/ |
void |
ilo_shader_cache_add(struct ilo_shader_cache *shc, |
struct ilo_shader_state *shader) |
{ |
struct ilo_shader *sh; |
shader->cache = shc; |
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) |
sh->uploaded = false; |
list_add(&shader->list, &shc->changed); |
} |
/** |
* Remove a shader from the cache. |
*/ |
void |
ilo_shader_cache_remove(struct ilo_shader_cache *shc, |
struct ilo_shader_state *shader) |
{ |
list_del(&shader->list); |
shader->cache = NULL; |
} |
/** |
* Notify the cache that a managed shader has changed. |
*/ |
static void |
ilo_shader_cache_notify_change(struct ilo_shader_cache *shc, |
struct ilo_shader_state *shader) |
{ |
if (shader->cache == shc) { |
list_del(&shader->list); |
list_add(&shader->list, &shc->changed); |
} |
} |
/** |
* Upload a managed shader to the bo. |
*/ |
static int |
ilo_shader_cache_upload_shader(struct ilo_shader_cache *shc, |
struct ilo_shader_state *shader, |
struct intel_bo *bo, unsigned offset, |
bool incremental) |
{ |
const unsigned base = offset; |
struct ilo_shader *sh; |
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) { |
int err; |
if (incremental && sh->uploaded) |
continue; |
/* kernels must be aligned to 64-byte */ |
offset = align(offset, 64); |
err = intel_bo_pwrite(bo, offset, sh->kernel_size, sh->kernel); |
if (unlikely(err)) |
return -1; |
sh->uploaded = true; |
sh->cache_offset = offset; |
offset += sh->kernel_size; |
} |
return (int) (offset - base); |
} |
/** |
* Similar to ilo_shader_cache_upload(), except no upload happens. |
*/ |
static int |
ilo_shader_cache_get_upload_size(struct ilo_shader_cache *shc, |
unsigned offset, |
bool incremental) |
{ |
const unsigned base = offset; |
struct ilo_shader_state *shader; |
if (!incremental) { |
LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) { |
struct ilo_shader *sh; |
/* see ilo_shader_cache_upload_shader() */ |
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) { |
if (!incremental || !sh->uploaded) |
offset = align(offset, 64) + sh->kernel_size; |
} |
} |
} |
LIST_FOR_EACH_ENTRY(shader, &shc->changed, list) { |
struct ilo_shader *sh; |
/* see ilo_shader_cache_upload_shader() */ |
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) { |
if (!incremental || !sh->uploaded) |
offset = align(offset, 64) + sh->kernel_size; |
} |
} |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 112: |
* |
* "Due to prefetch of the instruction stream, the EUs may attempt to |
* access up to 8 instructions (128 bytes) beyond the end of the |
* kernel program - possibly into the next memory page. Although |
* these instructions will not be executed, software must account for |
* the prefetch in order to avoid invalid page access faults." |
*/ |
if (offset > base) |
offset += 128; |
return (int) (offset - base); |
} |
/** |
* Upload managed shaders to the bo. When incremental is true, only shaders |
* that are changed or added after the last upload are uploaded. |
*/ |
int |
ilo_shader_cache_upload(struct ilo_shader_cache *shc, |
struct intel_bo *bo, unsigned offset, |
bool incremental) |
{ |
struct ilo_shader_state *shader, *next; |
int size = 0, s; |
if (!bo) |
return ilo_shader_cache_get_upload_size(shc, offset, incremental); |
if (!incremental) { |
LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) { |
s = ilo_shader_cache_upload_shader(shc, shader, |
bo, offset, incremental); |
if (unlikely(s < 0)) |
return s; |
size += s; |
offset += s; |
} |
} |
LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->changed, list) { |
s = ilo_shader_cache_upload_shader(shc, shader, |
bo, offset, incremental); |
if (unlikely(s < 0)) |
return s; |
size += s; |
offset += s; |
list_del(&shader->list); |
list_add(&shader->list, &shc->shaders); |
} |
return size; |
} |
/** |
* Initialize a shader variant. |
*/ |
void |
ilo_shader_variant_init(struct ilo_shader_variant *variant, |
const struct ilo_shader_info *info, |
const struct ilo_context *ilo) |
{ |
int num_views, i; |
memset(variant, 0, sizeof(*variant)); |
switch (info->type) { |
case PIPE_SHADER_VERTEX: |
variant->u.vs.rasterizer_discard = |
ilo->rasterizer->state.rasterizer_discard; |
variant->u.vs.num_ucps = |
util_last_bit(ilo->rasterizer->state.clip_plane_enable); |
break; |
case PIPE_SHADER_GEOMETRY: |
variant->u.gs.rasterizer_discard = |
ilo->rasterizer->state.rasterizer_discard; |
variant->u.gs.num_inputs = ilo->vs->shader->out.count; |
for (i = 0; i < ilo->vs->shader->out.count; i++) { |
variant->u.gs.semantic_names[i] = |
ilo->vs->shader->out.semantic_names[i]; |
variant->u.gs.semantic_indices[i] = |
ilo->vs->shader->out.semantic_indices[i]; |
} |
break; |
case PIPE_SHADER_FRAGMENT: |
variant->u.fs.flatshade = |
(info->has_color_interp && ilo->rasterizer->state.flatshade); |
variant->u.fs.fb_height = (info->has_pos) ? |
ilo->fb.state.height : 1; |
variant->u.fs.num_cbufs = ilo->fb.state.nr_cbufs; |
break; |
default: |
assert(!"unknown shader type"); |
break; |
} |
num_views = ilo->view[info->type].count; |
assert(info->num_samplers <= num_views); |
variant->num_sampler_views = info->num_samplers; |
for (i = 0; i < info->num_samplers; i++) { |
const struct pipe_sampler_view *view = ilo->view[info->type].states[i]; |
const struct ilo_sampler_cso *sampler = ilo->sampler[info->type].cso[i]; |
if (view) { |
variant->sampler_view_swizzles[i].r = view->swizzle_r; |
variant->sampler_view_swizzles[i].g = view->swizzle_g; |
variant->sampler_view_swizzles[i].b = view->swizzle_b; |
variant->sampler_view_swizzles[i].a = view->swizzle_a; |
} |
else if (info->shadow_samplers & (1 << i)) { |
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE; |
} |
else { |
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN; |
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE; |
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA; |
} |
/* |
* When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used, |
* the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need |
* to manually saturate the texture coordinates. |
*/ |
if (sampler) { |
variant->saturate_tex_coords[0] |= sampler->saturate_s << i; |
variant->saturate_tex_coords[1] |= sampler->saturate_t << i; |
variant->saturate_tex_coords[2] |= sampler->saturate_r << i; |
} |
} |
} |
/** |
* Guess the shader variant, knowing that the context may still change. |
*/ |
static void |
ilo_shader_variant_guess(struct ilo_shader_variant *variant, |
const struct ilo_shader_info *info, |
const struct ilo_context *ilo) |
{ |
int i; |
memset(variant, 0, sizeof(*variant)); |
switch (info->type) { |
case PIPE_SHADER_VERTEX: |
break; |
case PIPE_SHADER_GEOMETRY: |
break; |
case PIPE_SHADER_FRAGMENT: |
variant->u.fs.flatshade = false; |
variant->u.fs.fb_height = (info->has_pos) ? |
ilo->fb.state.height : 1; |
variant->u.fs.num_cbufs = 1; |
break; |
default: |
assert(!"unknown shader type"); |
break; |
} |
variant->num_sampler_views = info->num_samplers; |
for (i = 0; i < info->num_samplers; i++) { |
if (info->shadow_samplers & (1 << i)) { |
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE; |
} |
else { |
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED; |
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN; |
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE; |
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA; |
} |
} |
} |
/** |
* Parse a TGSI instruction for the shader info. |
*/ |
static void |
ilo_shader_info_parse_inst(struct ilo_shader_info *info, |
const struct tgsi_full_instruction *inst) |
{ |
int i; |
/* look for edgeflag passthrough */ |
if (info->edgeflag_out >= 0 && |
inst->Instruction.Opcode == TGSI_OPCODE_MOV && |
inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && |
inst->Dst[0].Register.Index == info->edgeflag_out) { |
assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); |
info->edgeflag_in = inst->Src[0].Register.Index; |
} |
if (inst->Instruction.Texture) { |
bool shadow; |
switch (inst->Texture.Texture) { |
case TGSI_TEXTURE_SHADOW1D: |
case TGSI_TEXTURE_SHADOW2D: |
case TGSI_TEXTURE_SHADOWRECT: |
case TGSI_TEXTURE_SHADOW1D_ARRAY: |
case TGSI_TEXTURE_SHADOW2D_ARRAY: |
case TGSI_TEXTURE_SHADOWCUBE: |
case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
shadow = true; |
break; |
default: |
shadow = false; |
break; |
} |
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { |
const struct tgsi_full_src_register *src = &inst->Src[i]; |
if (src->Register.File == TGSI_FILE_SAMPLER) { |
const int idx = src->Register.Index; |
if (idx >= info->num_samplers) |
info->num_samplers = idx + 1; |
if (shadow) |
info->shadow_samplers |= 1 << idx; |
} |
} |
} |
} |
/** |
* Parse a TGSI property for the shader info. |
*/ |
static void |
ilo_shader_info_parse_prop(struct ilo_shader_info *info, |
const struct tgsi_full_property *prop) |
{ |
switch (prop->Property.PropertyName) { |
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: |
info->fs_color0_writes_all_cbufs = prop->u[0].Data; |
break; |
default: |
break; |
} |
} |
/** |
* Parse a TGSI declaration for the shader info. |
*/ |
static void |
ilo_shader_info_parse_decl(struct ilo_shader_info *info, |
const struct tgsi_full_declaration *decl) |
{ |
switch (decl->Declaration.File) { |
case TGSI_FILE_INPUT: |
if (decl->Declaration.Interpolate && |
decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR) |
info->has_color_interp = true; |
if (decl->Declaration.Semantic && |
decl->Semantic.Name == TGSI_SEMANTIC_POSITION) |
info->has_pos = true; |
break; |
case TGSI_FILE_OUTPUT: |
if (decl->Declaration.Semantic && |
decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) |
info->edgeflag_out = decl->Range.First; |
break; |
case TGSI_FILE_SYSTEM_VALUE: |
if (decl->Declaration.Semantic && |
decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) |
info->has_instanceid = true; |
if (decl->Declaration.Semantic && |
decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID) |
info->has_vertexid = true; |
break; |
default: |
break; |
} |
} |
static void |
ilo_shader_info_parse_tokens(struct ilo_shader_info *info) |
{ |
struct tgsi_parse_context parse; |
info->edgeflag_in = -1; |
info->edgeflag_out = -1; |
tgsi_parse_init(&parse, info->tokens); |
while (!tgsi_parse_end_of_tokens(&parse)) { |
const union tgsi_full_token *token; |
tgsi_parse_token(&parse); |
token = &parse.FullToken; |
switch (token->Token.Type) { |
case TGSI_TOKEN_TYPE_DECLARATION: |
ilo_shader_info_parse_decl(info, &token->FullDeclaration); |
break; |
case TGSI_TOKEN_TYPE_INSTRUCTION: |
ilo_shader_info_parse_inst(info, &token->FullInstruction); |
break; |
case TGSI_TOKEN_TYPE_PROPERTY: |
ilo_shader_info_parse_prop(info, &token->FullProperty); |
break; |
default: |
break; |
} |
} |
tgsi_parse_free(&parse); |
} |
/** |
* Create a shader state. |
*/ |
static struct ilo_shader_state * |
ilo_shader_state_create(const struct ilo_context *ilo, |
int type, const void *templ) |
{ |
struct ilo_shader_state *state; |
struct ilo_shader_variant variant; |
state = CALLOC_STRUCT(ilo_shader_state); |
if (!state) |
return NULL; |
state->info.dev = ilo->dev; |
state->info.type = type; |
if (type == PIPE_SHADER_COMPUTE) { |
const struct pipe_compute_state *c = |
(const struct pipe_compute_state *) templ; |
state->info.tokens = tgsi_dup_tokens(c->prog); |
state->info.compute.req_local_mem = c->req_local_mem; |
state->info.compute.req_private_mem = c->req_private_mem; |
state->info.compute.req_input_mem = c->req_input_mem; |
} |
else { |
const struct pipe_shader_state *s = |
(const struct pipe_shader_state *) templ; |
state->info.tokens = tgsi_dup_tokens(s->tokens); |
state->info.stream_output = s->stream_output; |
} |
list_inithead(&state->variants); |
ilo_shader_info_parse_tokens(&state->info); |
/* guess and compile now */ |
ilo_shader_variant_guess(&variant, &state->info, ilo); |
if (!ilo_shader_state_use_variant(state, &variant)) { |
ilo_shader_destroy(state); |
return NULL; |
} |
return state; |
} |
/** |
* Add a compiled shader to the shader state. |
*/ |
static void |
ilo_shader_state_add_shader(struct ilo_shader_state *state, |
struct ilo_shader *sh) |
{ |
list_add(&sh->list, &state->variants); |
state->num_variants++; |
state->total_size += sh->kernel_size; |
if (state->cache) |
ilo_shader_cache_notify_change(state->cache, state); |
} |
/** |
* Remove a compiled shader from the shader state. |
*/ |
static void |
ilo_shader_state_remove_shader(struct ilo_shader_state *state, |
struct ilo_shader *sh) |
{ |
list_del(&sh->list); |
state->num_variants--; |
state->total_size -= sh->kernel_size; |
} |
/** |
* Garbage collect shader variants in the shader state. |
*/ |
static void |
ilo_shader_state_gc(struct ilo_shader_state *state) |
{ |
/* activate when the variants take up more than 4KiB of space */ |
const int limit = 4 * 1024; |
struct ilo_shader *sh, *next; |
if (state->total_size < limit) |
return; |
/* remove from the tail as the most recently ones are at the head */ |
LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) { |
ilo_shader_state_remove_shader(state, sh); |
ilo_shader_destroy_kernel(sh); |
if (state->total_size <= limit / 2) |
break; |
} |
} |
/** |
* Search for a shader variant. |
*/ |
static struct ilo_shader * |
ilo_shader_state_search_variant(struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
struct ilo_shader *sh = NULL, *tmp; |
LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) { |
if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) { |
sh = tmp; |
break; |
} |
} |
return sh; |
} |
static void |
copy_so_info(struct ilo_shader *sh, |
const struct pipe_stream_output_info *so_info) |
{ |
unsigned i, attr; |
if (!so_info->num_outputs) |
return; |
sh->so_info = *so_info; |
for (i = 0; i < so_info->num_outputs; i++) { |
/* figure out which attribute is sourced */ |
for (attr = 0; attr < sh->out.count; attr++) { |
const int reg_idx = sh->out.register_indices[attr]; |
if (reg_idx == so_info->output[i].register_index) |
break; |
} |
if (attr < sh->out.count) { |
sh->so_info.output[i].register_index = attr; |
} |
else { |
assert(!"stream output an undefined register"); |
sh->so_info.output[i].register_index = 0; |
} |
/* PSIZE is at W channel */ |
if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) { |
assert(so_info->output[i].start_component == 0); |
assert(so_info->output[i].num_components == 1); |
sh->so_info.output[i].start_component = 3; |
} |
} |
} |
/** |
* Add a shader variant to the shader state. |
*/ |
static struct ilo_shader * |
ilo_shader_state_add_variant(struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
struct ilo_shader *sh; |
switch (state->info.type) { |
case PIPE_SHADER_VERTEX: |
sh = ilo_shader_compile_vs(state, variant); |
break; |
case PIPE_SHADER_FRAGMENT: |
sh = ilo_shader_compile_fs(state, variant); |
break; |
case PIPE_SHADER_GEOMETRY: |
sh = ilo_shader_compile_gs(state, variant); |
break; |
case PIPE_SHADER_COMPUTE: |
sh = ilo_shader_compile_cs(state, variant); |
break; |
default: |
sh = NULL; |
break; |
} |
if (!sh) { |
assert(!"failed to compile shader"); |
return NULL; |
} |
sh->variant = *variant; |
copy_so_info(sh, &state->info.stream_output); |
ilo_shader_state_add_shader(state, sh); |
return sh; |
} |
/** |
* Update state->shader to point to a variant. If the variant does not exist, |
* it will be added first. |
*/ |
bool |
ilo_shader_state_use_variant(struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
struct ilo_shader *sh; |
bool construct_cso = false; |
sh = ilo_shader_state_search_variant(state, variant); |
if (!sh) { |
ilo_shader_state_gc(state); |
sh = ilo_shader_state_add_variant(state, variant); |
if (!sh) |
return false; |
construct_cso = true; |
} |
/* move to head */ |
if (state->variants.next != &sh->list) { |
list_del(&sh->list); |
list_add(&sh->list, &state->variants); |
} |
state->shader = sh; |
if (construct_cso) { |
switch (state->info.type) { |
case PIPE_SHADER_VERTEX: |
ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso); |
break; |
case PIPE_SHADER_GEOMETRY: |
ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso); |
break; |
case PIPE_SHADER_FRAGMENT: |
ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso); |
break; |
default: |
break; |
} |
} |
return true; |
} |
struct ilo_shader_state * |
ilo_shader_create_vs(const struct ilo_dev_info *dev, |
const struct pipe_shader_state *state, |
const struct ilo_context *precompile) |
{ |
struct ilo_shader_state *shader; |
shader = ilo_shader_state_create(precompile, PIPE_SHADER_VERTEX, state); |
/* states used in ilo_shader_variant_init() */ |
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS | |
ILO_DIRTY_RASTERIZER; |
return shader; |
} |
struct ilo_shader_state * |
ilo_shader_create_gs(const struct ilo_dev_info *dev, |
const struct pipe_shader_state *state, |
const struct ilo_context *precompile) |
{ |
struct ilo_shader_state *shader; |
shader = ilo_shader_state_create(precompile, PIPE_SHADER_GEOMETRY, state); |
/* states used in ilo_shader_variant_init() */ |
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS | |
ILO_DIRTY_VS | |
ILO_DIRTY_RASTERIZER; |
return shader; |
} |
struct ilo_shader_state * |
ilo_shader_create_fs(const struct ilo_dev_info *dev, |
const struct pipe_shader_state *state, |
const struct ilo_context *precompile) |
{ |
struct ilo_shader_state *shader; |
shader = ilo_shader_state_create(precompile, PIPE_SHADER_FRAGMENT, state); |
/* states used in ilo_shader_variant_init() */ |
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS | |
ILO_DIRTY_RASTERIZER | |
ILO_DIRTY_FB; |
return shader; |
} |
struct ilo_shader_state * |
ilo_shader_create_cs(const struct ilo_dev_info *dev, |
const struct pipe_compute_state *state, |
const struct ilo_context *precompile) |
{ |
struct ilo_shader_state *shader; |
shader = ilo_shader_state_create(precompile, PIPE_SHADER_COMPUTE, state); |
shader->info.non_orthogonal_states = 0; |
return shader; |
} |
/** |
* Destroy a shader state. |
*/ |
void |
ilo_shader_destroy(struct ilo_shader_state *shader) |
{ |
struct ilo_shader *sh, *next; |
LIST_FOR_EACH_ENTRY_SAFE(sh, next, &shader->variants, list) |
ilo_shader_destroy_kernel(sh); |
FREE((struct tgsi_token *) shader->info.tokens); |
FREE(shader); |
} |
/** |
* Return the type (PIPE_SHADER_x) of the shader. |
*/ |
int |
ilo_shader_get_type(const struct ilo_shader_state *shader) |
{ |
return shader->info.type; |
} |
/** |
* Select a kernel for the given context. This will compile a new kernel if |
* none of the existing kernels work with the context. |
* |
* \param ilo the context |
* \param dirty states of the context that are considered changed |
* \return true if a different kernel is selected |
*/ |
bool |
ilo_shader_select_kernel(struct ilo_shader_state *shader, |
const struct ilo_context *ilo, |
uint32_t dirty) |
{ |
const struct ilo_shader * const cur = shader->shader; |
struct ilo_shader_variant variant; |
if (!(shader->info.non_orthogonal_states & dirty)) |
return false; |
ilo_shader_variant_init(&variant, &shader->info, ilo); |
ilo_shader_state_use_variant(shader, &variant); |
return (shader->shader != cur); |
} |
static int |
route_attr(const int *semantics, const int *indices, int len, |
int semantic, int index) |
{ |
int i; |
for (i = 0; i < len; i++) { |
if (semantics[i] == semantic && indices[i] == index) |
return i; |
} |
/* failed to match for COLOR, try BCOLOR */ |
if (semantic == TGSI_SEMANTIC_COLOR) { |
for (i = 0; i < len; i++) { |
if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index) |
return i; |
} |
} |
return -1; |
} |
/** |
* Select a routing for the given source shader and rasterizer state. |
* |
* \return true if a different routing is selected |
*/ |
bool |
ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, |
const struct ilo_shader_state *source, |
const struct ilo_rasterizer_state *rasterizer) |
{ |
const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable; |
const bool light_twoside = rasterizer->state.light_twoside; |
struct ilo_shader *kernel = shader->shader; |
struct ilo_kernel_routing *routing = &kernel->routing; |
const int *src_semantics, *src_indices; |
int src_len, max_src_slot; |
int dst_len, dst_slot; |
/* we are constructing 3DSTATE_SBE here */ |
assert(shader->info.dev->gen >= ILO_GEN(6) && |
shader->info.dev->gen <= ILO_GEN(7)); |
assert(kernel); |
if (source) { |
assert(source->shader); |
src_semantics = source->shader->out.semantic_names; |
src_indices = source->shader->out.semantic_indices; |
src_len = source->shader->out.count; |
} |
else { |
src_semantics = kernel->in.semantic_names; |
src_indices = kernel->in.semantic_indices; |
src_len = kernel->in.count; |
} |
/* no change */ |
if (kernel->routing_initialized && |
routing->source_skip + routing->source_len <= src_len && |
kernel->routing_sprite_coord_enable == sprite_coord_enable && |
!memcmp(kernel->routing_src_semantics, |
&src_semantics[routing->source_skip], |
sizeof(kernel->routing_src_semantics[0]) * routing->source_len) && |
!memcmp(kernel->routing_src_indices, |
&src_indices[routing->source_skip], |
sizeof(kernel->routing_src_indices[0]) * routing->source_len)) |
return false; |
if (source) { |
/* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */ |
assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE); |
assert(src_semantics[1] == TGSI_SEMANTIC_POSITION); |
routing->source_skip = 2; |
routing->source_len = src_len - routing->source_skip; |
src_semantics += routing->source_skip; |
src_indices += routing->source_skip; |
} |
else { |
routing->source_skip = 0; |
routing->source_len = src_len; |
} |
routing->const_interp_enable = kernel->in.const_interp_enable; |
routing->point_sprite_enable = 0; |
routing->swizzle_enable = false; |
assert(kernel->in.count <= Elements(routing->swizzles)); |
dst_len = MIN2(kernel->in.count, Elements(routing->swizzles)); |
max_src_slot = -1; |
for (dst_slot = 0; dst_slot < dst_len; dst_slot++) { |
const int semantic = kernel->in.semantic_names[dst_slot]; |
const int index = kernel->in.semantic_indices[dst_slot]; |
int src_slot; |
if (semantic == TGSI_SEMANTIC_GENERIC && |
(sprite_coord_enable & (1 << index))) |
routing->point_sprite_enable |= 1 << dst_slot; |
if (source) { |
src_slot = route_attr(src_semantics, src_indices, |
routing->source_len, semantic, index); |
/* |
* The source shader stage does not output this attribute. The value |
* is supposed to be undefined, unless the attribute goes through |
* point sprite replacement or the attribute is |
* TGSI_SEMANTIC_POSITION. In all cases, we do not care which source |
* attribute is picked. |
* |
* We should update the kernel code and omit the output of |
* TGSI_SEMANTIC_POSITION here. |
*/ |
if (src_slot < 0) |
src_slot = 0; |
} |
else { |
src_slot = dst_slot; |
} |
routing->swizzles[dst_slot] = src_slot; |
/* use the following slot for two-sided lighting */ |
if (semantic == TGSI_SEMANTIC_COLOR && light_twoside && |
src_slot + 1 < routing->source_len && |
src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR && |
src_indices[src_slot + 1] == index) { |
routing->swizzles[dst_slot] |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << |
ATTRIBUTE_SWIZZLE_SHIFT; |
src_slot++; |
} |
if (routing->swizzles[dst_slot] != dst_slot) |
routing->swizzle_enable = true; |
if (max_src_slot < src_slot) |
max_src_slot = src_slot; |
} |
memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) - |
sizeof(routing->swizzles[0]) * dst_slot); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 248: |
* |
* "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to |
* 0 indicating no Vertex URB data to be read. |
* |
* This field should be set to the minimum length required to read the |
* maximum source attribute. The maximum source attribute is indicated |
* by the maximum value of the enabled Attribute # Source Attribute if |
* Attribute Swizzle Enable is set, Number of Output Attributes-1 if |
* enable is not set. |
* |
* read_length = ceiling((max_source_attr+1)/2) |
* |
* [errata] Corruption/Hang possible if length programmed larger than |
* recommended" |
*/ |
routing->source_len = max_src_slot + 1; |
/* remember the states of the source */ |
kernel->routing_initialized = true; |
kernel->routing_sprite_coord_enable = sprite_coord_enable; |
memcpy(kernel->routing_src_semantics, src_semantics, |
sizeof(kernel->routing_src_semantics[0]) * routing->source_len); |
memcpy(kernel->routing_src_indices, src_indices, |
sizeof(kernel->routing_src_indices[0]) * routing->source_len); |
return true; |
} |
/** |
* Return the cache offset of the selected kernel. This must be called after |
* ilo_shader_select_kernel() and ilo_shader_cache_upload(). |
*/ |
uint32_t |
ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader) |
{ |
const struct ilo_shader *kernel = shader->shader; |
assert(kernel && kernel->uploaded); |
return kernel->cache_offset; |
} |
/** |
* Query a kernel parameter for the selected kernel. |
*/ |
int |
ilo_shader_get_kernel_param(const struct ilo_shader_state *shader, |
enum ilo_kernel_param param) |
{ |
const struct ilo_shader *kernel = shader->shader; |
int val; |
assert(kernel); |
switch (param) { |
case ILO_KERNEL_INPUT_COUNT: |
val = kernel->in.count; |
break; |
case ILO_KERNEL_OUTPUT_COUNT: |
val = kernel->out.count; |
break; |
case ILO_KERNEL_URB_DATA_START_REG: |
val = kernel->in.start_grf; |
break; |
case ILO_KERNEL_VS_INPUT_INSTANCEID: |
val = shader->info.has_instanceid; |
break; |
case ILO_KERNEL_VS_INPUT_VERTEXID: |
val = shader->info.has_vertexid; |
break; |
case ILO_KERNEL_VS_INPUT_EDGEFLAG: |
if (shader->info.edgeflag_in >= 0) { |
/* we rely on the state tracker here */ |
assert(shader->info.edgeflag_in == kernel->in.count - 1); |
val = true; |
} |
else { |
val = false; |
} |
break; |
case ILO_KERNEL_VS_PCB_UCP_SIZE: |
val = kernel->pcb.clip_state_size; |
break; |
case ILO_KERNEL_VS_GEN6_SO: |
val = kernel->stream_output; |
break; |
case ILO_KERNEL_VS_GEN6_SO_START_REG: |
val = kernel->gs_start_grf; |
break; |
case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET: |
val = kernel->gs_offsets[0]; |
break; |
case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET: |
val = kernel->gs_offsets[1]; |
break; |
case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET: |
val = kernel->gs_offsets[2]; |
break; |
case ILO_KERNEL_GS_DISCARD_ADJACENCY: |
val = kernel->in.discard_adj; |
break; |
case ILO_KERNEL_GS_GEN6_SVBI_POST_INC: |
val = kernel->svbi_post_inc; |
break; |
case ILO_KERNEL_FS_INPUT_Z: |
case ILO_KERNEL_FS_INPUT_W: |
val = kernel->in.has_pos; |
break; |
case ILO_KERNEL_FS_OUTPUT_Z: |
val = kernel->out.has_pos; |
break; |
case ILO_KERNEL_FS_USE_KILL: |
val = kernel->has_kill; |
break; |
case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS: |
val = kernel->in.barycentric_interpolation_mode; |
break; |
case ILO_KERNEL_FS_DISPATCH_16_OFFSET: |
val = 0; |
break; |
default: |
assert(!"unknown kernel parameter"); |
val = 0; |
break; |
} |
return val; |
} |
/** |
* Return the CSO of the selected kernel. |
*/ |
const struct ilo_shader_cso * |
ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader) |
{ |
const struct ilo_shader *kernel = shader->shader; |
assert(kernel); |
return &kernel->cso; |
} |
/** |
* Return the SO info of the selected kernel. |
*/ |
const struct pipe_stream_output_info * |
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader) |
{ |
const struct ilo_shader *kernel = shader->shader; |
assert(kernel); |
return &kernel->so_info; |
} |
/** |
* Return the routing info of the selected kernel. |
*/ |
const struct ilo_kernel_routing * |
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader) |
{ |
const struct ilo_shader *kernel = shader->shader; |
assert(kernel); |
return &kernel->routing; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_shader.h |
---|
0,0 → 1,148 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_SHADER_H |
#define ILO_SHADER_H |
#include "ilo_common.h" |
enum ilo_kernel_param { |
ILO_KERNEL_INPUT_COUNT, |
ILO_KERNEL_OUTPUT_COUNT, |
ILO_KERNEL_URB_DATA_START_REG, |
ILO_KERNEL_VS_INPUT_INSTANCEID, |
ILO_KERNEL_VS_INPUT_VERTEXID, |
ILO_KERNEL_VS_INPUT_EDGEFLAG, |
ILO_KERNEL_VS_PCB_UCP_SIZE, |
ILO_KERNEL_VS_GEN6_SO, |
ILO_KERNEL_VS_GEN6_SO_START_REG, |
ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET, |
ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET, |
ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET, |
ILO_KERNEL_GS_DISCARD_ADJACENCY, |
ILO_KERNEL_GS_GEN6_SVBI_POST_INC, |
ILO_KERNEL_FS_INPUT_Z, |
ILO_KERNEL_FS_INPUT_W, |
ILO_KERNEL_FS_OUTPUT_Z, |
ILO_KERNEL_FS_USE_KILL, |
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS, |
ILO_KERNEL_FS_DISPATCH_16_OFFSET, |
ILO_KERNEL_PARAM_COUNT, |
}; |
struct ilo_kernel_routing { |
uint32_t const_interp_enable; |
uint32_t point_sprite_enable; |
unsigned source_skip, source_len; |
bool swizzle_enable; |
uint16_t swizzles[16]; |
}; |
struct intel_bo; |
struct ilo_context; |
struct ilo_rasterizer_state; |
struct ilo_shader_cache; |
struct ilo_shader_state; |
struct ilo_shader_cso; |
struct ilo_shader_cache * |
ilo_shader_cache_create(void); |
void |
ilo_shader_cache_destroy(struct ilo_shader_cache *shc); |
void |
ilo_shader_cache_add(struct ilo_shader_cache *shc, |
struct ilo_shader_state *shader); |
void |
ilo_shader_cache_remove(struct ilo_shader_cache *shc, |
struct ilo_shader_state *shader); |
int |
ilo_shader_cache_upload(struct ilo_shader_cache *shc, |
struct intel_bo *bo, unsigned offset, |
bool incremental); |
struct ilo_shader_state * |
ilo_shader_create_vs(const struct ilo_dev_info *dev, |
const struct pipe_shader_state *state, |
const struct ilo_context *precompile); |
struct ilo_shader_state * |
ilo_shader_create_gs(const struct ilo_dev_info *dev, |
const struct pipe_shader_state *state, |
const struct ilo_context *precompile); |
struct ilo_shader_state * |
ilo_shader_create_fs(const struct ilo_dev_info *dev, |
const struct pipe_shader_state *state, |
const struct ilo_context *precompile); |
struct ilo_shader_state * |
ilo_shader_create_cs(const struct ilo_dev_info *dev, |
const struct pipe_compute_state *state, |
const struct ilo_context *precompile); |
void |
ilo_shader_destroy(struct ilo_shader_state *shader); |
int |
ilo_shader_get_type(const struct ilo_shader_state *shader); |
bool |
ilo_shader_select_kernel(struct ilo_shader_state *shader, |
const struct ilo_context *ilo, |
uint32_t dirty); |
bool |
ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, |
const struct ilo_shader_state *source, |
const struct ilo_rasterizer_state *rasterizer); |
uint32_t |
ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader); |
int |
ilo_shader_get_kernel_param(const struct ilo_shader_state *shader, |
enum ilo_kernel_param param); |
const struct ilo_shader_cso * |
ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader); |
const struct pipe_stream_output_info * |
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader); |
const struct ilo_kernel_routing * |
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader); |
#endif /* ILO_SHADER_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_state.c |
---|
0,0 → 1,1449 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_framebuffer.h" |
#include "util/u_helpers.h" |
#include "util/u_upload_mgr.h" |
#include "ilo_context.h" |
#include "ilo_resource.h" |
#include "ilo_shader.h" |
#include "ilo_state.h" |
static void |
finalize_shader_states(struct ilo_context *ilo) |
{ |
unsigned type; |
for (type = 0; type < PIPE_SHADER_TYPES; type++) { |
struct ilo_shader_state *shader; |
uint32_t state; |
switch (type) { |
case PIPE_SHADER_VERTEX: |
shader = ilo->vs; |
state = ILO_DIRTY_VS; |
break; |
case PIPE_SHADER_GEOMETRY: |
shader = ilo->gs; |
state = ILO_DIRTY_GS; |
break; |
case PIPE_SHADER_FRAGMENT: |
shader = ilo->fs; |
state = ILO_DIRTY_FS; |
break; |
default: |
shader = NULL; |
state = 0; |
break; |
} |
if (!shader) |
continue; |
/* compile if the shader or the states it depends on changed */ |
if (ilo->dirty & state) { |
ilo_shader_select_kernel(shader, ilo, ILO_DIRTY_ALL); |
} |
else if (ilo_shader_select_kernel(shader, ilo, ilo->dirty)) { |
/* mark the state dirty if a new kernel is selected */ |
ilo->dirty |= state; |
} |
/* need to setup SBE for FS */ |
if (type == PIPE_SHADER_FRAGMENT && ilo->dirty & |
(state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) { |
if (ilo_shader_select_kernel_routing(shader, |
(ilo->gs) ? ilo->gs : ilo->vs, ilo->rasterizer)) |
ilo->dirty |= state; |
} |
} |
} |
static void |
finalize_constant_buffers(struct ilo_context *ilo) |
{ |
int sh; |
if (!(ilo->dirty & ILO_DIRTY_CBUF)) |
return; |
/* TODO push constants? */ |
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { |
unsigned enabled_mask = ilo->cbuf[sh].enabled_mask; |
while (enabled_mask) { |
struct ilo_cbuf_cso *cbuf; |
int i; |
i = u_bit_scan(&enabled_mask); |
cbuf = &ilo->cbuf[sh].cso[i]; |
/* upload user buffer */ |
if (cbuf->user_buffer) { |
const enum pipe_format elem_format = |
PIPE_FORMAT_R32G32B32A32_FLOAT; |
unsigned offset; |
u_upload_data(ilo->uploader, 0, cbuf->user_buffer_size, |
cbuf->user_buffer, &offset, &cbuf->resource); |
ilo_gpe_init_view_surface_for_buffer(ilo->dev, |
ilo_buffer(cbuf->resource), |
offset, cbuf->user_buffer_size, |
util_format_get_blocksize(elem_format), elem_format, |
false, false, &cbuf->surface); |
cbuf->user_buffer = NULL; |
cbuf->user_buffer_size = 0; |
} |
} |
} |
} |
static void |
finalize_index_buffer(struct ilo_context *ilo) |
{ |
const struct pipe_resource *current_hw_res = ilo->ib.hw_resource; |
const bool need_upload = (ilo->draw->indexed && |
(ilo->ib.user_buffer || ilo->ib.offset % ilo->ib.index_size)); |
if (!(ilo->dirty & ILO_DIRTY_IB) && !need_upload) |
return; |
if (need_upload) { |
const unsigned offset = ilo->ib.index_size * ilo->draw->start; |
const unsigned size = ilo->ib.index_size * ilo->draw->count; |
unsigned hw_offset; |
if (ilo->ib.user_buffer) { |
u_upload_data(ilo->uploader, 0, size, |
ilo->ib.user_buffer + offset, &hw_offset, &ilo->ib.hw_resource); |
} |
else { |
u_upload_buffer(ilo->uploader, 0, ilo->ib.offset + offset, size, |
ilo->ib.buffer, &hw_offset, &ilo->ib.hw_resource); |
} |
/* the HW offset should be aligned */ |
assert(hw_offset % ilo->ib.index_size == 0); |
ilo->ib.draw_start_offset = hw_offset / ilo->ib.index_size; |
/* |
* INDEX[ilo->draw->start] in the original buffer is INDEX[0] in the HW |
* resource |
*/ |
ilo->ib.draw_start_offset -= ilo->draw->start; |
} |
else { |
pipe_resource_reference(&ilo->ib.hw_resource, ilo->ib.buffer); |
/* note that index size may be zero when the draw is not indexed */ |
if (ilo->draw->indexed) |
ilo->ib.draw_start_offset = ilo->ib.offset / ilo->ib.index_size; |
else |
ilo->ib.draw_start_offset = 0; |
} |
/* treat the IB as clean if the HW states do not change */ |
if (ilo->ib.hw_resource == current_hw_res && |
ilo->ib.hw_index_size == ilo->ib.index_size) |
ilo->dirty &= ~ILO_DIRTY_IB; |
else |
ilo->ib.hw_index_size = ilo->ib.index_size; |
} |
/** |
* Finalize states. Some states depend on other states and are |
* incomplete/invalid until finalized. |
*/ |
void |
ilo_finalize_3d_states(struct ilo_context *ilo, |
const struct pipe_draw_info *draw) |
{ |
ilo->draw = draw; |
finalize_shader_states(ilo); |
finalize_constant_buffers(ilo); |
finalize_index_buffer(ilo); |
u_upload_unmap(ilo->uploader); |
} |
static void * |
ilo_create_blend_state(struct pipe_context *pipe, |
const struct pipe_blend_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_blend_state *blend; |
blend = MALLOC_STRUCT(ilo_blend_state); |
assert(blend); |
ilo_gpe_init_blend(ilo->dev, state, blend); |
return blend; |
} |
static void |
ilo_bind_blend_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->blend = state; |
ilo->dirty |= ILO_DIRTY_BLEND; |
} |
static void |
ilo_delete_blend_state(struct pipe_context *pipe, void *state) |
{ |
FREE(state); |
} |
static void * |
ilo_create_sampler_state(struct pipe_context *pipe, |
const struct pipe_sampler_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_sampler_cso *sampler; |
sampler = MALLOC_STRUCT(ilo_sampler_cso); |
assert(sampler); |
ilo_gpe_init_sampler_cso(ilo->dev, state, sampler); |
return sampler; |
} |
static void |
ilo_bind_sampler_states(struct pipe_context *pipe, unsigned shader, |
unsigned start, unsigned count, void **samplers) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_sampler_state *dst = &ilo->sampler[shader]; |
unsigned i; |
assert(start + count <= Elements(dst->cso)); |
if (likely(shader != PIPE_SHADER_COMPUTE)) { |
if (!samplers) { |
start = 0; |
count = 0; |
} |
/* samplers not in range are also unbound */ |
for (i = 0; i < start; i++) |
dst->cso[i] = NULL; |
for (; i < start + count; i++) |
dst->cso[i] = samplers[i - start]; |
for (; i < dst->count; i++) |
dst->cso[i] = NULL; |
dst->count = start + count; |
return; |
} |
if (samplers) { |
for (i = 0; i < count; i++) |
dst->cso[start + i] = samplers[i]; |
} |
else { |
for (i = 0; i < count; i++) |
dst->cso[start + i] = NULL; |
} |
if (dst->count <= start + count) { |
if (samplers) |
count += start; |
else |
count = start; |
while (count > 0 && !dst->cso[count - 1]) |
count--; |
dst->count = count; |
} |
} |
static void |
ilo_bind_fragment_sampler_states(struct pipe_context *pipe, |
unsigned num_samplers, |
void **samplers) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT, |
0, num_samplers, samplers); |
ilo->dirty |= ILO_DIRTY_SAMPLER_FS; |
} |
static void |
ilo_bind_vertex_sampler_states(struct pipe_context *pipe, |
unsigned num_samplers, |
void **samplers) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_bind_sampler_states(pipe, PIPE_SHADER_VERTEX, |
0, num_samplers, samplers); |
ilo->dirty |= ILO_DIRTY_SAMPLER_VS; |
} |
static void |
ilo_bind_geometry_sampler_states(struct pipe_context *pipe, |
unsigned num_samplers, |
void **samplers) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_bind_sampler_states(pipe, PIPE_SHADER_GEOMETRY, |
0, num_samplers, samplers); |
ilo->dirty |= ILO_DIRTY_SAMPLER_GS; |
} |
static void |
ilo_bind_compute_sampler_states(struct pipe_context *pipe, |
unsigned start_slot, |
unsigned num_samplers, |
void **samplers) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, |
start_slot, num_samplers, samplers); |
ilo->dirty |= ILO_DIRTY_SAMPLER_CS; |
} |
static void |
ilo_delete_sampler_state(struct pipe_context *pipe, void *state) |
{ |
FREE(state); |
} |
static void * |
ilo_create_rasterizer_state(struct pipe_context *pipe, |
const struct pipe_rasterizer_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_rasterizer_state *rast; |
rast = MALLOC_STRUCT(ilo_rasterizer_state); |
assert(rast); |
rast->state = *state; |
ilo_gpe_init_rasterizer(ilo->dev, state, rast); |
return rast; |
} |
static void |
ilo_bind_rasterizer_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->rasterizer = state; |
ilo->dirty |= ILO_DIRTY_RASTERIZER; |
} |
static void |
ilo_delete_rasterizer_state(struct pipe_context *pipe, void *state) |
{ |
FREE(state); |
} |
static void * |
ilo_create_depth_stencil_alpha_state(struct pipe_context *pipe, |
const struct pipe_depth_stencil_alpha_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_dsa_state *dsa; |
dsa = MALLOC_STRUCT(ilo_dsa_state); |
assert(dsa); |
ilo_gpe_init_dsa(ilo->dev, state, dsa); |
return dsa; |
} |
static void |
ilo_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->dsa = state; |
ilo->dirty |= ILO_DIRTY_DSA; |
} |
static void |
ilo_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *state) |
{ |
FREE(state); |
} |
static void * |
ilo_create_fs_state(struct pipe_context *pipe, |
const struct pipe_shader_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *shader; |
shader = ilo_shader_create_fs(ilo->dev, state, ilo); |
assert(shader); |
ilo_shader_cache_add(ilo->shader_cache, shader); |
return shader; |
} |
static void |
ilo_bind_fs_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->fs = state; |
ilo->dirty |= ILO_DIRTY_FS; |
} |
static void |
ilo_delete_fs_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *fs = (struct ilo_shader_state *) state; |
ilo_shader_cache_remove(ilo->shader_cache, fs); |
ilo_shader_destroy(fs); |
} |
static void * |
ilo_create_vs_state(struct pipe_context *pipe, |
const struct pipe_shader_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *shader; |
shader = ilo_shader_create_vs(ilo->dev, state, ilo); |
assert(shader); |
ilo_shader_cache_add(ilo->shader_cache, shader); |
return shader; |
} |
static void |
ilo_bind_vs_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->vs = state; |
ilo->dirty |= ILO_DIRTY_VS; |
} |
static void |
ilo_delete_vs_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *vs = (struct ilo_shader_state *) state; |
ilo_shader_cache_remove(ilo->shader_cache, vs); |
ilo_shader_destroy(vs); |
} |
static void * |
ilo_create_gs_state(struct pipe_context *pipe, |
const struct pipe_shader_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *shader; |
shader = ilo_shader_create_gs(ilo->dev, state, ilo); |
assert(shader); |
ilo_shader_cache_add(ilo->shader_cache, shader); |
return shader; |
} |
static void |
ilo_bind_gs_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
/* util_blitter may set this unnecessarily */ |
if (ilo->gs == state) |
return; |
ilo->gs = state; |
ilo->dirty |= ILO_DIRTY_GS; |
} |
static void |
ilo_delete_gs_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *gs = (struct ilo_shader_state *) state; |
ilo_shader_cache_remove(ilo->shader_cache, gs); |
ilo_shader_destroy(gs); |
} |
static void * |
ilo_create_vertex_elements_state(struct pipe_context *pipe, |
unsigned num_elements, |
const struct pipe_vertex_element *elements) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_ve_state *ve; |
ve = MALLOC_STRUCT(ilo_ve_state); |
assert(ve); |
ilo_gpe_init_ve(ilo->dev, num_elements, elements, ve); |
return ve; |
} |
static void |
ilo_bind_vertex_elements_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->ve = state; |
ilo->dirty |= ILO_DIRTY_VE; |
} |
static void |
ilo_delete_vertex_elements_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_ve_state *ve = state; |
FREE(ve); |
} |
static void |
ilo_set_blend_color(struct pipe_context *pipe, |
const struct pipe_blend_color *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->blend_color = *state; |
ilo->dirty |= ILO_DIRTY_BLEND_COLOR; |
} |
static void |
ilo_set_stencil_ref(struct pipe_context *pipe, |
const struct pipe_stencil_ref *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
/* util_blitter may set this unnecessarily */ |
if (!memcpy(&ilo->stencil_ref, state, sizeof(*state))) |
return; |
ilo->stencil_ref = *state; |
ilo->dirty |= ILO_DIRTY_STENCIL_REF; |
} |
static void |
ilo_set_sample_mask(struct pipe_context *pipe, |
unsigned sample_mask) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
/* util_blitter may set this unnecessarily */ |
if (ilo->sample_mask == sample_mask) |
return; |
ilo->sample_mask = sample_mask; |
ilo->dirty |= ILO_DIRTY_SAMPLE_MASK; |
} |
static void |
ilo_set_clip_state(struct pipe_context *pipe, |
const struct pipe_clip_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->clip = *state; |
ilo->dirty |= ILO_DIRTY_CLIP; |
} |
static void |
ilo_set_constant_buffer(struct pipe_context *pipe, |
uint shader, uint index, |
struct pipe_constant_buffer *buf) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_cbuf_state *cbuf = &ilo->cbuf[shader]; |
const unsigned count = 1; |
unsigned i; |
assert(shader < Elements(ilo->cbuf)); |
assert(index + count <= Elements(ilo->cbuf[shader].cso)); |
if (buf) { |
for (i = 0; i < count; i++) { |
struct ilo_cbuf_cso *cso = &cbuf->cso[index + i]; |
pipe_resource_reference(&cso->resource, buf[i].buffer); |
if (buf[i].buffer) { |
const enum pipe_format elem_format = |
PIPE_FORMAT_R32G32B32A32_FLOAT; |
ilo_gpe_init_view_surface_for_buffer(ilo->dev, |
ilo_buffer(buf[i].buffer), |
buf[i].buffer_offset, buf[i].buffer_size, |
util_format_get_blocksize(elem_format), elem_format, |
false, false, &cso->surface); |
cso->user_buffer = NULL; |
cso->user_buffer_size = 0; |
cbuf->enabled_mask |= 1 << (index + i); |
} |
else if (buf[i].user_buffer) { |
cso->surface.bo = NULL; |
/* buffer_offset does not apply for user buffer */ |
cso->user_buffer = buf[i].user_buffer; |
cso->user_buffer_size = buf[i].buffer_size; |
cbuf->enabled_mask |= 1 << (index + i); |
} |
else { |
cso->surface.bo = NULL; |
cso->user_buffer = NULL; |
cso->user_buffer_size = 0; |
cbuf->enabled_mask &= ~(1 << (index + i)); |
} |
} |
} |
else { |
for (i = 0; i < count; i++) { |
struct ilo_cbuf_cso *cso = &cbuf->cso[index + i]; |
pipe_resource_reference(&cso->resource, NULL); |
cso->surface.bo = NULL; |
cso->user_buffer = NULL; |
cso->user_buffer_size = 0; |
cbuf->enabled_mask &= ~(1 << (index + i)); |
} |
} |
ilo->dirty |= ILO_DIRTY_CBUF; |
} |
static void |
ilo_set_framebuffer_state(struct pipe_context *pipe, |
const struct pipe_framebuffer_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
util_copy_framebuffer_state(&ilo->fb.state, state); |
if (state->nr_cbufs) |
ilo->fb.num_samples = state->cbufs[0]->texture->nr_samples; |
else if (state->zsbuf) |
ilo->fb.num_samples = state->zsbuf->texture->nr_samples; |
else |
ilo->fb.num_samples = 1; |
if (!ilo->fb.num_samples) |
ilo->fb.num_samples = 1; |
ilo->dirty |= ILO_DIRTY_FB; |
} |
static void |
ilo_set_polygon_stipple(struct pipe_context *pipe, |
const struct pipe_poly_stipple *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->poly_stipple = *state; |
ilo->dirty |= ILO_DIRTY_POLY_STIPPLE; |
} |
static void |
ilo_set_scissor_states(struct pipe_context *pipe, |
unsigned start_slot, |
unsigned num_scissors, |
const struct pipe_scissor_state *scissors) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_gpe_set_scissor(ilo->dev, start_slot, num_scissors, |
scissors, &ilo->scissor); |
ilo->dirty |= ILO_DIRTY_SCISSOR; |
} |
static void |
ilo_set_viewport_states(struct pipe_context *pipe, |
unsigned start_slot, |
unsigned num_viewports, |
const struct pipe_viewport_state *viewports) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
if (viewports) { |
unsigned i; |
for (i = 0; i < num_viewports; i++) { |
ilo_gpe_set_viewport_cso(ilo->dev, &viewports[i], |
&ilo->viewport.cso[start_slot + i]); |
} |
if (ilo->viewport.count < start_slot + num_viewports) |
ilo->viewport.count = start_slot + num_viewports; |
/* need to save viewport 0 for util_blitter */ |
if (!start_slot && num_viewports) |
ilo->viewport.viewport0 = viewports[0]; |
} |
else { |
if (ilo->viewport.count <= start_slot + num_viewports && |
ilo->viewport.count > start_slot) |
ilo->viewport.count = start_slot; |
} |
ilo->dirty |= ILO_DIRTY_VIEWPORT; |
} |
static void |
ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader, |
unsigned start, unsigned count, |
struct pipe_sampler_view **views) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_view_state *dst = &ilo->view[shader]; |
unsigned i; |
assert(start + count <= Elements(dst->states)); |
if (likely(shader != PIPE_SHADER_COMPUTE)) { |
if (!views) { |
start = 0; |
count = 0; |
} |
/* views not in range are also unbound */ |
for (i = 0; i < start; i++) |
pipe_sampler_view_reference(&dst->states[i], NULL); |
for (; i < start + count; i++) |
pipe_sampler_view_reference(&dst->states[i], views[i - start]); |
for (; i < dst->count; i++) |
pipe_sampler_view_reference(&dst->states[i], NULL); |
dst->count = start + count; |
return; |
} |
if (views) { |
for (i = 0; i < count; i++) |
pipe_sampler_view_reference(&dst->states[start + i], views[i]); |
} |
else { |
for (i = 0; i < count; i++) |
pipe_sampler_view_reference(&dst->states[start + i], NULL); |
} |
if (dst->count <= start + count) { |
if (views) |
count += start; |
else |
count = start; |
while (count > 0 && !dst->states[count - 1]) |
count--; |
dst->count = count; |
} |
} |
static void |
ilo_set_fragment_sampler_views(struct pipe_context *pipe, |
unsigned num_views, |
struct pipe_sampler_view **views) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, |
0, num_views, views); |
ilo->dirty |= ILO_DIRTY_VIEW_FS; |
} |
static void |
ilo_set_vertex_sampler_views(struct pipe_context *pipe, |
unsigned num_views, |
struct pipe_sampler_view **views) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_set_sampler_views(pipe, PIPE_SHADER_VERTEX, |
0, num_views, views); |
ilo->dirty |= ILO_DIRTY_VIEW_VS; |
} |
static void |
ilo_set_geometry_sampler_views(struct pipe_context *pipe, |
unsigned num_views, |
struct pipe_sampler_view **views) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_set_sampler_views(pipe, PIPE_SHADER_GEOMETRY, |
0, num_views, views); |
ilo->dirty |= ILO_DIRTY_VIEW_GS; |
} |
static void |
ilo_set_compute_sampler_views(struct pipe_context *pipe, |
unsigned start_slot, unsigned num_views, |
struct pipe_sampler_view **views) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo_set_sampler_views(pipe, PIPE_SHADER_COMPUTE, |
start_slot, num_views, views); |
ilo->dirty |= ILO_DIRTY_VIEW_CS; |
} |
static void |
ilo_set_shader_resources(struct pipe_context *pipe, |
unsigned start, unsigned count, |
struct pipe_surface **surfaces) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_resource_state *dst = &ilo->resource; |
unsigned i; |
assert(start + count <= Elements(dst->states)); |
if (surfaces) { |
for (i = 0; i < count; i++) |
pipe_surface_reference(&dst->states[start + i], surfaces[i]); |
} |
else { |
for (i = 0; i < count; i++) |
pipe_surface_reference(&dst->states[start + i], NULL); |
} |
if (dst->count <= start + count) { |
if (surfaces) |
count += start; |
else |
count = start; |
while (count > 0 && !dst->states[count - 1]) |
count--; |
dst->count = count; |
} |
ilo->dirty |= ILO_DIRTY_RESOURCE; |
} |
static void |
ilo_set_vertex_buffers(struct pipe_context *pipe, |
unsigned start_slot, unsigned num_buffers, |
const struct pipe_vertex_buffer *buffers) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
unsigned i; |
/* no PIPE_CAP_USER_VERTEX_BUFFERS */ |
if (buffers) { |
for (i = 0; i < num_buffers; i++) |
assert(!buffers[i].user_buffer); |
} |
util_set_vertex_buffers_mask(ilo->vb.states, |
&ilo->vb.enabled_mask, buffers, start_slot, num_buffers); |
ilo->dirty |= ILO_DIRTY_VB; |
} |
static void |
ilo_set_index_buffer(struct pipe_context *pipe, |
const struct pipe_index_buffer *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
if (state) { |
pipe_resource_reference(&ilo->ib.buffer, state->buffer); |
ilo->ib.user_buffer = state->user_buffer; |
ilo->ib.offset = state->offset; |
ilo->ib.index_size = state->index_size; |
} |
else { |
pipe_resource_reference(&ilo->ib.buffer, NULL); |
ilo->ib.user_buffer = NULL; |
ilo->ib.offset = 0; |
ilo->ib.index_size = 0; |
} |
ilo->dirty |= ILO_DIRTY_IB; |
} |
static struct pipe_stream_output_target * |
ilo_create_stream_output_target(struct pipe_context *pipe, |
struct pipe_resource *res, |
unsigned buffer_offset, |
unsigned buffer_size) |
{ |
struct pipe_stream_output_target *target; |
target = MALLOC_STRUCT(pipe_stream_output_target); |
assert(target); |
pipe_reference_init(&target->reference, 1); |
target->buffer = NULL; |
pipe_resource_reference(&target->buffer, res); |
target->context = pipe; |
target->buffer_offset = buffer_offset; |
target->buffer_size = buffer_size; |
return target; |
} |
static void |
ilo_set_stream_output_targets(struct pipe_context *pipe, |
unsigned num_targets, |
struct pipe_stream_output_target **targets, |
unsigned append_bitmask) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
unsigned i; |
if (!targets) |
num_targets = 0; |
/* util_blitter may set this unnecessarily */ |
if (!ilo->so.count && !num_targets) |
return; |
for (i = 0; i < num_targets; i++) |
pipe_so_target_reference(&ilo->so.states[i], targets[i]); |
for (; i < ilo->so.count; i++) |
pipe_so_target_reference(&ilo->so.states[i], NULL); |
ilo->so.count = num_targets; |
ilo->so.append_bitmask = append_bitmask; |
ilo->so.enabled = (ilo->so.count > 0); |
ilo->dirty |= ILO_DIRTY_SO; |
} |
static void |
ilo_stream_output_target_destroy(struct pipe_context *pipe, |
struct pipe_stream_output_target *target) |
{ |
pipe_resource_reference(&target->buffer, NULL); |
FREE(target); |
} |
static struct pipe_sampler_view * |
ilo_create_sampler_view(struct pipe_context *pipe, |
struct pipe_resource *res, |
const struct pipe_sampler_view *templ) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_view_cso *view; |
view = MALLOC_STRUCT(ilo_view_cso); |
assert(view); |
view->base = *templ; |
pipe_reference_init(&view->base.reference, 1); |
view->base.texture = NULL; |
pipe_resource_reference(&view->base.texture, res); |
view->base.context = pipe; |
if (res->target == PIPE_BUFFER) { |
const unsigned elem_size = util_format_get_blocksize(templ->format); |
const unsigned first_elem = templ->u.buf.first_element; |
const unsigned num_elems = templ->u.buf.last_element - first_elem + 1; |
ilo_gpe_init_view_surface_for_buffer(ilo->dev, ilo_buffer(res), |
first_elem * elem_size, num_elems * elem_size, |
elem_size, templ->format, false, false, &view->surface); |
} |
else { |
struct ilo_texture *tex = ilo_texture(res); |
/* warn about degraded performance because of a missing binding flag */ |
if (tex->tiling == INTEL_TILING_NONE && |
!(tex->base.bind & PIPE_BIND_SAMPLER_VIEW)) { |
ilo_warn("creating sampler view for a resource " |
"not created for sampling\n"); |
} |
ilo_gpe_init_view_surface_for_texture(ilo->dev, tex, |
templ->format, |
templ->u.tex.first_level, |
templ->u.tex.last_level - templ->u.tex.first_level + 1, |
templ->u.tex.first_layer, |
templ->u.tex.last_layer - templ->u.tex.first_layer + 1, |
false, false, &view->surface); |
} |
return &view->base; |
} |
static void |
ilo_sampler_view_destroy(struct pipe_context *pipe, |
struct pipe_sampler_view *view) |
{ |
pipe_resource_reference(&view->texture, NULL); |
FREE(view); |
} |
static struct pipe_surface * |
ilo_create_surface(struct pipe_context *pipe, |
struct pipe_resource *res, |
const struct pipe_surface *templ) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_surface_cso *surf; |
surf = MALLOC_STRUCT(ilo_surface_cso); |
assert(surf); |
surf->base = *templ; |
pipe_reference_init(&surf->base.reference, 1); |
surf->base.texture = NULL; |
pipe_resource_reference(&surf->base.texture, res); |
surf->base.context = pipe; |
surf->base.width = u_minify(res->width0, templ->u.tex.level); |
surf->base.height = u_minify(res->height0, templ->u.tex.level); |
surf->is_rt = !util_format_is_depth_or_stencil(templ->format); |
if (surf->is_rt) { |
/* relax this? */ |
assert(res->target != PIPE_BUFFER); |
/* |
* classic i965 sets render_cache_rw for constant buffers and sol |
* surfaces but not render buffers. Why? |
*/ |
ilo_gpe_init_view_surface_for_texture(ilo->dev, ilo_texture(res), |
templ->format, templ->u.tex.level, 1, |
templ->u.tex.first_layer, |
templ->u.tex.last_layer - templ->u.tex.first_layer + 1, |
true, true, &surf->u.rt); |
} |
else { |
assert(res->target != PIPE_BUFFER); |
ilo_gpe_init_zs_surface(ilo->dev, ilo_texture(res), |
templ->format, templ->u.tex.level, |
templ->u.tex.first_layer, |
templ->u.tex.last_layer - templ->u.tex.first_layer + 1, |
&surf->u.zs); |
} |
return &surf->base; |
} |
static void |
ilo_surface_destroy(struct pipe_context *pipe, |
struct pipe_surface *surface) |
{ |
pipe_resource_reference(&surface->texture, NULL); |
FREE(surface); |
} |
static void * |
ilo_create_compute_state(struct pipe_context *pipe, |
const struct pipe_compute_state *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *shader; |
shader = ilo_shader_create_cs(ilo->dev, state, ilo); |
assert(shader); |
ilo_shader_cache_add(ilo->shader_cache, shader); |
return shader; |
} |
static void |
ilo_bind_compute_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
ilo->cs = state; |
ilo->dirty |= ILO_DIRTY_CS; |
} |
static void |
ilo_delete_compute_state(struct pipe_context *pipe, void *state) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_shader_state *cs = (struct ilo_shader_state *) state; |
ilo_shader_cache_remove(ilo->shader_cache, cs); |
ilo_shader_destroy(cs); |
} |
static void |
ilo_set_compute_resources(struct pipe_context *pipe, |
unsigned start, unsigned count, |
struct pipe_surface **surfaces) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_resource_state *dst = &ilo->cs_resource; |
unsigned i; |
assert(start + count <= Elements(dst->states)); |
if (surfaces) { |
for (i = 0; i < count; i++) |
pipe_surface_reference(&dst->states[start + i], surfaces[i]); |
} |
else { |
for (i = 0; i < count; i++) |
pipe_surface_reference(&dst->states[start + i], NULL); |
} |
if (dst->count <= start + count) { |
if (surfaces) |
count += start; |
else |
count = start; |
while (count > 0 && !dst->states[count - 1]) |
count--; |
dst->count = count; |
} |
ilo->dirty |= ILO_DIRTY_CS_RESOURCE; |
} |
static void |
ilo_set_global_binding(struct pipe_context *pipe, |
unsigned start, unsigned count, |
struct pipe_resource **resources, |
uint32_t **handles) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_global_binding *dst = &ilo->global_binding; |
unsigned i; |
assert(start + count <= Elements(dst->resources)); |
if (resources) { |
for (i = 0; i < count; i++) |
pipe_resource_reference(&dst->resources[start + i], resources[i]); |
} |
else { |
for (i = 0; i < count; i++) |
pipe_resource_reference(&dst->resources[start + i], NULL); |
} |
if (dst->count <= start + count) { |
if (resources) |
count += start; |
else |
count = start; |
while (count > 0 && !dst->resources[count - 1]) |
count--; |
dst->count = count; |
} |
ilo->dirty |= ILO_DIRTY_GLOBAL_BINDING; |
} |
/** |
* Initialize state-related functions. |
*/ |
void |
ilo_init_state_functions(struct ilo_context *ilo) |
{ |
STATIC_ASSERT(ILO_STATE_COUNT <= 32); |
ilo->base.create_blend_state = ilo_create_blend_state; |
ilo->base.bind_blend_state = ilo_bind_blend_state; |
ilo->base.delete_blend_state = ilo_delete_blend_state; |
ilo->base.create_sampler_state = ilo_create_sampler_state; |
ilo->base.bind_fragment_sampler_states = ilo_bind_fragment_sampler_states; |
ilo->base.bind_vertex_sampler_states = ilo_bind_vertex_sampler_states; |
ilo->base.bind_geometry_sampler_states = ilo_bind_geometry_sampler_states; |
ilo->base.bind_compute_sampler_states = ilo_bind_compute_sampler_states; |
ilo->base.delete_sampler_state = ilo_delete_sampler_state; |
ilo->base.create_rasterizer_state = ilo_create_rasterizer_state; |
ilo->base.bind_rasterizer_state = ilo_bind_rasterizer_state; |
ilo->base.delete_rasterizer_state = ilo_delete_rasterizer_state; |
ilo->base.create_depth_stencil_alpha_state = ilo_create_depth_stencil_alpha_state; |
ilo->base.bind_depth_stencil_alpha_state = ilo_bind_depth_stencil_alpha_state; |
ilo->base.delete_depth_stencil_alpha_state = ilo_delete_depth_stencil_alpha_state; |
ilo->base.create_fs_state = ilo_create_fs_state; |
ilo->base.bind_fs_state = ilo_bind_fs_state; |
ilo->base.delete_fs_state = ilo_delete_fs_state; |
ilo->base.create_vs_state = ilo_create_vs_state; |
ilo->base.bind_vs_state = ilo_bind_vs_state; |
ilo->base.delete_vs_state = ilo_delete_vs_state; |
ilo->base.create_gs_state = ilo_create_gs_state; |
ilo->base.bind_gs_state = ilo_bind_gs_state; |
ilo->base.delete_gs_state = ilo_delete_gs_state; |
ilo->base.create_vertex_elements_state = ilo_create_vertex_elements_state; |
ilo->base.bind_vertex_elements_state = ilo_bind_vertex_elements_state; |
ilo->base.delete_vertex_elements_state = ilo_delete_vertex_elements_state; |
ilo->base.set_blend_color = ilo_set_blend_color; |
ilo->base.set_stencil_ref = ilo_set_stencil_ref; |
ilo->base.set_sample_mask = ilo_set_sample_mask; |
ilo->base.set_clip_state = ilo_set_clip_state; |
ilo->base.set_constant_buffer = ilo_set_constant_buffer; |
ilo->base.set_framebuffer_state = ilo_set_framebuffer_state; |
ilo->base.set_polygon_stipple = ilo_set_polygon_stipple; |
ilo->base.set_scissor_states = ilo_set_scissor_states; |
ilo->base.set_viewport_states = ilo_set_viewport_states; |
ilo->base.set_fragment_sampler_views = ilo_set_fragment_sampler_views; |
ilo->base.set_vertex_sampler_views = ilo_set_vertex_sampler_views; |
ilo->base.set_geometry_sampler_views = ilo_set_geometry_sampler_views; |
ilo->base.set_compute_sampler_views = ilo_set_compute_sampler_views; |
ilo->base.set_shader_resources = ilo_set_shader_resources; |
ilo->base.set_vertex_buffers = ilo_set_vertex_buffers; |
ilo->base.set_index_buffer = ilo_set_index_buffer; |
ilo->base.create_stream_output_target = ilo_create_stream_output_target; |
ilo->base.stream_output_target_destroy = ilo_stream_output_target_destroy; |
ilo->base.set_stream_output_targets = ilo_set_stream_output_targets; |
ilo->base.create_sampler_view = ilo_create_sampler_view; |
ilo->base.sampler_view_destroy = ilo_sampler_view_destroy; |
ilo->base.create_surface = ilo_create_surface; |
ilo->base.surface_destroy = ilo_surface_destroy; |
ilo->base.create_compute_state = ilo_create_compute_state; |
ilo->base.bind_compute_state = ilo_bind_compute_state; |
ilo->base.delete_compute_state = ilo_delete_compute_state; |
ilo->base.set_compute_resources = ilo_set_compute_resources; |
ilo->base.set_global_binding = ilo_set_global_binding; |
} |
void |
ilo_init_states(struct ilo_context *ilo) |
{ |
ilo_gpe_set_scissor_null(ilo->dev, &ilo->scissor); |
ilo_gpe_init_zs_surface(ilo->dev, NULL, |
PIPE_FORMAT_NONE, 0, 0, 1, &ilo->fb.null_zs); |
ilo->dirty = ILO_DIRTY_ALL; |
} |
void |
ilo_cleanup_states(struct ilo_context *ilo) |
{ |
unsigned i, sh; |
for (i = 0; i < Elements(ilo->vb.states); i++) { |
if (ilo->vb.enabled_mask & (1 << i)) |
pipe_resource_reference(&ilo->vb.states[i].buffer, NULL); |
} |
pipe_resource_reference(&ilo->ib.buffer, NULL); |
pipe_resource_reference(&ilo->ib.hw_resource, NULL); |
for (i = 0; i < ilo->so.count; i++) |
pipe_so_target_reference(&ilo->so.states[i], NULL); |
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { |
for (i = 0; i < ilo->view[sh].count; i++) { |
struct pipe_sampler_view *view = ilo->view[sh].states[i]; |
pipe_sampler_view_reference(&view, NULL); |
} |
for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) { |
struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i]; |
pipe_resource_reference(&cbuf->resource, NULL); |
} |
} |
for (i = 0; i < ilo->resource.count; i++) |
pipe_surface_reference(&ilo->resource.states[i], NULL); |
for (i = 0; i < ilo->fb.state.nr_cbufs; i++) |
pipe_surface_reference(&ilo->fb.state.cbufs[i], NULL); |
if (ilo->fb.state.zsbuf) |
pipe_surface_reference(&ilo->fb.state.zsbuf, NULL); |
for (i = 0; i < ilo->cs_resource.count; i++) |
pipe_surface_reference(&ilo->cs_resource.states[i], NULL); |
for (i = 0; i < ilo->global_binding.count; i++) |
pipe_resource_reference(&ilo->global_binding.resources[i], NULL); |
} |
/** |
* Mark all states that have the resource dirty. |
*/ |
void |
ilo_mark_states_with_resource_dirty(struct ilo_context *ilo, |
const struct pipe_resource *res) |
{ |
uint32_t states = 0; |
unsigned sh, i; |
if (res->target == PIPE_BUFFER) { |
uint32_t vb_mask = ilo->vb.enabled_mask; |
while (vb_mask) { |
const unsigned idx = u_bit_scan(&vb_mask); |
if (ilo->vb.states[idx].buffer == res) { |
states |= ILO_DIRTY_VB; |
break; |
} |
} |
if (ilo->ib.buffer == res) { |
states |= ILO_DIRTY_IB; |
/* |
* finalize_index_buffer() has an optimization that clears |
* ILO_DIRTY_IB when the HW states do not change. However, it fails |
* to flush the VF cache when the HW states do not change, but the |
* contents of the IB has changed. Here, we set the index size to an |
* invalid value to avoid the optimization. |
*/ |
ilo->ib.hw_index_size = 0; |
} |
for (i = 0; i < ilo->so.count; i++) { |
if (ilo->so.states[i]->buffer == res) { |
states |= ILO_DIRTY_SO; |
break; |
} |
} |
} |
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { |
for (i = 0; i < ilo->view[sh].count; i++) { |
struct pipe_sampler_view *view = ilo->view[sh].states[i]; |
if (view->texture == res) { |
static const unsigned view_dirty_bits[PIPE_SHADER_TYPES] = { |
[PIPE_SHADER_VERTEX] = ILO_DIRTY_VIEW_VS, |
[PIPE_SHADER_FRAGMENT] = ILO_DIRTY_VIEW_FS, |
[PIPE_SHADER_GEOMETRY] = ILO_DIRTY_VIEW_GS, |
[PIPE_SHADER_COMPUTE] = ILO_DIRTY_VIEW_CS, |
}; |
states |= view_dirty_bits[sh]; |
break; |
} |
} |
if (res->target == PIPE_BUFFER) { |
for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) { |
struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i]; |
if (cbuf->resource == res) { |
states |= ILO_DIRTY_CBUF; |
break; |
} |
} |
} |
} |
for (i = 0; i < ilo->resource.count; i++) { |
if (ilo->resource.states[i]->texture == res) { |
states |= ILO_DIRTY_RESOURCE; |
break; |
} |
} |
/* for now? */ |
if (res->target != PIPE_BUFFER) { |
for (i = 0; i < ilo->fb.state.nr_cbufs; i++) { |
if (ilo->fb.state.cbufs[i]->texture == res) { |
states |= ILO_DIRTY_FB; |
break; |
} |
} |
if (ilo->fb.state.zsbuf && ilo->fb.state.zsbuf->texture == res) |
states |= ILO_DIRTY_FB; |
} |
for (i = 0; i < ilo->cs_resource.count; i++) { |
pipe_surface_reference(&ilo->cs_resource.states[i], NULL); |
if (ilo->cs_resource.states[i]->texture == res) { |
states |= ILO_DIRTY_CS_RESOURCE; |
break; |
} |
} |
for (i = 0; i < ilo->global_binding.count; i++) { |
if (ilo->global_binding.resources[i] == res) { |
states |= ILO_DIRTY_GLOBAL_BINDING; |
break; |
} |
} |
ilo->dirty |= states; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_state.h |
---|
0,0 → 1,139 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_STATE_H |
#define ILO_STATE_H |
#include "ilo_common.h" |
/** |
* States that we track. |
* |
* XXX Do we want to count each sampler or vertex buffer as a state? If that |
* is the case, there are simply not enough bits. |
* |
* XXX We want to treat primitive type and depth clear value as states, but |
* there are not enough bits. |
*/ |
enum ilo_state { |
ILO_STATE_VB, |
ILO_STATE_VE, |
ILO_STATE_IB, |
ILO_STATE_VS, |
ILO_STATE_GS, |
ILO_STATE_SO, |
ILO_STATE_CLIP, |
ILO_STATE_VIEWPORT, |
ILO_STATE_SCISSOR, |
ILO_STATE_RASTERIZER, |
ILO_STATE_POLY_STIPPLE, |
ILO_STATE_SAMPLE_MASK, |
ILO_STATE_FS, |
ILO_STATE_DSA, |
ILO_STATE_STENCIL_REF, |
ILO_STATE_BLEND, |
ILO_STATE_BLEND_COLOR, |
ILO_STATE_FB, |
ILO_STATE_SAMPLER_VS, |
ILO_STATE_SAMPLER_GS, |
ILO_STATE_SAMPLER_FS, |
ILO_STATE_SAMPLER_CS, |
ILO_STATE_VIEW_VS, |
ILO_STATE_VIEW_GS, |
ILO_STATE_VIEW_FS, |
ILO_STATE_VIEW_CS, |
ILO_STATE_CBUF, |
ILO_STATE_RESOURCE, |
ILO_STATE_CS, |
ILO_STATE_CS_RESOURCE, |
ILO_STATE_GLOBAL_BINDING, |
ILO_STATE_COUNT, |
}; |
/** |
* Dirty flags of the states. |
*/ |
enum ilo_dirty_flags { |
ILO_DIRTY_VB = 1 << ILO_STATE_VB, |
ILO_DIRTY_VE = 1 << ILO_STATE_VE, |
ILO_DIRTY_IB = 1 << ILO_STATE_IB, |
ILO_DIRTY_VS = 1 << ILO_STATE_VS, |
ILO_DIRTY_GS = 1 << ILO_STATE_GS, |
ILO_DIRTY_SO = 1 << ILO_STATE_SO, |
ILO_DIRTY_CLIP = 1 << ILO_STATE_CLIP, |
ILO_DIRTY_VIEWPORT = 1 << ILO_STATE_VIEWPORT, |
ILO_DIRTY_SCISSOR = 1 << ILO_STATE_SCISSOR, |
ILO_DIRTY_RASTERIZER = 1 << ILO_STATE_RASTERIZER, |
ILO_DIRTY_POLY_STIPPLE = 1 << ILO_STATE_POLY_STIPPLE, |
ILO_DIRTY_SAMPLE_MASK = 1 << ILO_STATE_SAMPLE_MASK, |
ILO_DIRTY_FS = 1 << ILO_STATE_FS, |
ILO_DIRTY_DSA = 1 << ILO_STATE_DSA, |
ILO_DIRTY_STENCIL_REF = 1 << ILO_STATE_STENCIL_REF, |
ILO_DIRTY_BLEND = 1 << ILO_STATE_BLEND, |
ILO_DIRTY_BLEND_COLOR = 1 << ILO_STATE_BLEND_COLOR, |
ILO_DIRTY_FB = 1 << ILO_STATE_FB, |
ILO_DIRTY_SAMPLER_VS = 1 << ILO_STATE_SAMPLER_VS, |
ILO_DIRTY_SAMPLER_GS = 1 << ILO_STATE_SAMPLER_GS, |
ILO_DIRTY_SAMPLER_FS = 1 << ILO_STATE_SAMPLER_FS, |
ILO_DIRTY_SAMPLER_CS = 1 << ILO_STATE_SAMPLER_CS, |
ILO_DIRTY_VIEW_VS = 1 << ILO_STATE_VIEW_VS, |
ILO_DIRTY_VIEW_GS = 1 << ILO_STATE_VIEW_GS, |
ILO_DIRTY_VIEW_FS = 1 << ILO_STATE_VIEW_FS, |
ILO_DIRTY_VIEW_CS = 1 << ILO_STATE_VIEW_CS, |
ILO_DIRTY_CBUF = 1 << ILO_STATE_CBUF, |
ILO_DIRTY_RESOURCE = 1 << ILO_STATE_RESOURCE, |
ILO_DIRTY_CS = 1 << ILO_STATE_CS, |
ILO_DIRTY_CS_RESOURCE = 1 << ILO_STATE_CS_RESOURCE, |
ILO_DIRTY_GLOBAL_BINDING = 1 << ILO_STATE_GLOBAL_BINDING, |
ILO_DIRTY_ALL = 0xffffffff, |
}; |
struct pipe_draw_info; |
struct pipe_resource; |
struct ilo_context; |
void |
ilo_init_state_functions(struct ilo_context *ilo); |
void |
ilo_init_states(struct ilo_context *ilo); |
void |
ilo_cleanup_states(struct ilo_context *ilo); |
void |
ilo_finalize_3d_states(struct ilo_context *ilo, |
const struct pipe_draw_info *draw); |
void |
ilo_mark_states_with_resource_dirty(struct ilo_context *ilo, |
const struct pipe_resource *res); |
#endif /* ILO_STATE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_transfer.c |
---|
0,0 → 1,1050 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "util/u_surface.h" |
#include "util/u_transfer.h" |
#include "util/u_format_etc.h" |
#include "ilo_cp.h" |
#include "ilo_context.h" |
#include "ilo_resource.h" |
#include "ilo_state.h" |
#include "ilo_transfer.h" |
static bool |
is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush) |
{ |
const bool referenced = intel_bo_references(ilo->cp->bo, bo); |
if (need_flush) |
*need_flush = referenced; |
if (referenced) |
return true; |
return intel_bo_is_busy(bo); |
} |
static bool |
map_bo_for_transfer(struct ilo_context *ilo, struct intel_bo *bo, |
const struct ilo_transfer *xfer) |
{ |
int err; |
switch (xfer->method) { |
case ILO_TRANSFER_MAP_CPU: |
err = intel_bo_map(bo, (xfer->base.usage & PIPE_TRANSFER_WRITE)); |
break; |
case ILO_TRANSFER_MAP_GTT: |
err = intel_bo_map_gtt(bo); |
break; |
case ILO_TRANSFER_MAP_UNSYNC: |
err = intel_bo_map_unsynchronized(bo); |
break; |
default: |
assert(!"unknown mapping method"); |
err = -1; |
break; |
} |
return !err; |
} |
/** |
* Choose the best mapping method, depending on the transfer usage and whether |
* the bo is busy. |
*/ |
static bool |
choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer) |
{ |
struct pipe_resource *res = xfer->base.resource; |
const unsigned usage = xfer->base.usage; |
/* prefer map() when there is the last-level cache */ |
const bool prefer_cpu = |
(ilo->dev->has_llc || (usage & PIPE_TRANSFER_READ)); |
struct ilo_texture *tex; |
struct ilo_buffer *buf; |
struct intel_bo *bo; |
bool tiled, need_flush; |
if (res->target == PIPE_BUFFER) { |
tex = NULL; |
buf = ilo_buffer(res); |
bo = buf->bo; |
tiled = false; |
} |
else { |
buf = NULL; |
tex = ilo_texture(res); |
bo = tex->bo; |
tiled = (tex->tiling != INTEL_TILING_NONE); |
} |
/* choose between mapping through CPU or GTT */ |
if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { |
/* we do not want fencing */ |
if (tiled || prefer_cpu) |
xfer->method = ILO_TRANSFER_MAP_CPU; |
else |
xfer->method = ILO_TRANSFER_MAP_GTT; |
} |
else { |
if (!tiled && prefer_cpu) |
xfer->method = ILO_TRANSFER_MAP_CPU; |
else |
xfer->method = ILO_TRANSFER_MAP_GTT; |
} |
/* see if we can avoid stalling */ |
if (is_bo_busy(ilo, bo, &need_flush)) { |
bool will_stall = true; |
if (usage & PIPE_TRANSFER_MAP_DIRECTLY) { |
/* nothing we can do */ |
} |
else if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { |
/* unsynchronized gtt mapping does not stall */ |
xfer->method = ILO_TRANSFER_MAP_UNSYNC; |
will_stall = false; |
} |
else if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { |
/* discard old bo and allocate a new one for mapping */ |
if ((tex && ilo_texture_alloc_bo(tex)) || |
(buf && ilo_buffer_alloc_bo(buf))) { |
ilo_mark_states_with_resource_dirty(ilo, res); |
will_stall = false; |
} |
} |
else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) { |
/* |
* We could allocate and return a system buffer here. When a region of |
* the buffer is explicitly flushed, we pwrite() the region to a |
* temporary bo and emit pipelined copy blit. |
* |
* For now, do nothing. |
*/ |
} |
else if (usage & PIPE_TRANSFER_DISCARD_RANGE) { |
/* |
* We could allocate a temporary bo for mapping, and emit pipelined copy |
* blit upon unmapping. |
* |
* For now, do nothing. |
*/ |
} |
if (will_stall) { |
if (usage & PIPE_TRANSFER_DONTBLOCK) |
return false; |
/* flush to make bo busy (so that map() stalls as it should be) */ |
if (need_flush) |
ilo_cp_flush(ilo->cp); |
} |
} |
if (tex && !(usage & PIPE_TRANSFER_MAP_DIRECTLY)) { |
if (tex->separate_s8 || tex->bo_format == PIPE_FORMAT_S8_UINT) |
xfer->method = ILO_TRANSFER_MAP_SW_ZS; |
/* need to convert on-the-fly */ |
else if (tex->bo_format != tex->base.format) |
xfer->method = ILO_TRANSFER_MAP_SW_CONVERT; |
} |
return true; |
} |
static void |
tex_get_box_origin(const struct ilo_texture *tex, |
unsigned level, unsigned slice, |
const struct pipe_box *box, |
unsigned *mem_x, unsigned *mem_y) |
{ |
unsigned x, y; |
x = tex->slice_offsets[level][slice + box->z].x + box->x; |
y = tex->slice_offsets[level][slice + box->z].y + box->y; |
assert(x % tex->block_width == 0 && y % tex->block_height == 0); |
*mem_x = x / tex->block_width * tex->bo_cpp; |
*mem_y = y / tex->block_height; |
} |
static unsigned |
tex_get_box_offset(const struct ilo_texture *tex, unsigned level, |
const struct pipe_box *box) |
{ |
unsigned mem_x, mem_y; |
tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y); |
return mem_y * tex->bo_stride + mem_x; |
} |
static unsigned |
tex_get_slice_stride(const struct ilo_texture *tex, unsigned level) |
{ |
unsigned qpitch; |
/* there is no 3D array texture */ |
assert(tex->base.array_size == 1 || tex->base.depth0 == 1); |
if (tex->base.array_size == 1) { |
/* non-array, non-3D */ |
if (tex->base.depth0 == 1) |
return 0; |
/* only the first level has a fixed slice stride */ |
if (level > 0) { |
assert(!"no slice stride for 3D texture with level > 0"); |
return 0; |
} |
} |
qpitch = tex->slice_offsets[level][1].y - tex->slice_offsets[level][0].y; |
assert(qpitch % tex->block_height == 0); |
return (qpitch / tex->block_height) * tex->bo_stride; |
} |
static unsigned |
tex_tile_x_swizzle(unsigned addr) |
{ |
/* |
* From the Ivy Bridge PRM, volume 1 part 2, page 24: |
* |
* "As shown in the tiling algorithm, the new address bit[6] should be: |
* |
* Address bit[6] <= TiledAddr bit[6] XOR |
* TiledAddr bit[9] XOR |
* TiledAddr bit[10]" |
*/ |
return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40); |
} |
static unsigned |
tex_tile_y_swizzle(unsigned addr) |
{ |
/* |
* From the Ivy Bridge PRM, volume 1 part 2, page 24: |
* |
* "As shown in the tiling algorithm, The new address bit[6] becomes: |
* |
* Address bit[6] <= TiledAddr bit[6] XOR |
* TiledAddr bit[9]" |
*/ |
return addr ^ ((addr >> 3) & 0x40); |
} |
static unsigned |
tex_tile_x_offset(unsigned mem_x, unsigned mem_y, |
unsigned tiles_per_row, bool swizzle) |
{ |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a |
* X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the |
* tiled region are numbered in row-major order, starting from zero. The |
* tile number can thus be calculated as follows: |
* |
* tile = (mem_y / 8) * tiles_per_row + (mem_x / 512) |
* |
* OWords in that tile are also numbered in row-major order, starting from |
* zero. The OWord number can thus be calculated as follows: |
* |
* oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16) |
* |
* and the tiled offset is |
* |
* offset = tile * 4096 + oword * 16 + (mem_x % 16) |
* = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512) |
*/ |
unsigned tile, offset; |
tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9); |
offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff); |
return (swizzle) ? tex_tile_x_swizzle(offset) : offset; |
} |
static unsigned |
tex_tile_y_offset(unsigned mem_x, unsigned mem_y, |
unsigned tiles_per_row, bool swizzle) |
{ |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a |
* Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the |
* tiled region are numbered in row-major order, starting from zero. The |
* tile number can thus be calculated as follows: |
* |
* tile = (mem_y / 32) * tiles_per_row + (mem_x / 128) |
* |
* OWords in that tile are numbered in column-major order, starting from |
* zero. The OWord number can thus be calculated as follows: |
* |
* oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32) |
* |
* and the tiled offset is |
* |
* offset = tile * 4096 + oword * 16 + (mem_x % 16) |
*/ |
unsigned tile, oword, offset; |
tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7); |
oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f); |
offset = tile << 12 | oword << 4 | (mem_x & 0xf); |
return (swizzle) ? tex_tile_y_swizzle(offset) : offset; |
} |
static unsigned |
tex_tile_w_offset(unsigned mem_x, unsigned mem_y, |
unsigned tiles_per_row, bool swizzle) |
{ |
/* |
* From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a |
* W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the |
* tiled region are numbered in row-major order, starting from zero. The |
* tile number can thus be calculated as follows: |
* |
* tile = (mem_y / 64) * tiles_per_row + (mem_x / 64) |
* |
* 8x8-blocks in that tile are numbered in column-major order, starting |
* from zero. The 8x8-block number can thus be calculated as follows: |
* |
* blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8) |
* |
* Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each |
* 4x4-block is further divided into 4 2x2-blocks, also in row-major order. |
* We have |
* |
* blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1) |
* blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1) |
* blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1) |
* |
* and the tiled offset is |
* |
* offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1 |
*/ |
unsigned tile, blk8, blk4, blk2, blk1, offset; |
tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6); |
blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7); |
blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1); |
blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1); |
blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1); |
offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1; |
return (swizzle) ? tex_tile_y_swizzle(offset) : offset; |
} |
static unsigned |
tex_tile_none_offset(unsigned mem_x, unsigned mem_y, |
unsigned tiles_per_row, bool swizzle) |
{ |
return mem_y * tiles_per_row + mem_x; |
} |
typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y, |
unsigned tiles_per_row, |
bool swizzle); |
static tex_tile_offset_func |
tex_tile_choose_offset_func(const struct ilo_texture *tex, |
unsigned *tiles_per_row) |
{ |
switch (tex->tiling) { |
case INTEL_TILING_X: |
*tiles_per_row = tex->bo_stride / 512; |
return tex_tile_x_offset; |
case INTEL_TILING_Y: |
*tiles_per_row = tex->bo_stride / 128; |
return tex_tile_y_offset; |
case INTEL_TILING_NONE: |
default: |
/* W-tiling */ |
if (tex->bo_format == PIPE_FORMAT_S8_UINT) { |
*tiles_per_row = tex->bo_stride / 64; |
return tex_tile_w_offset; |
} |
else { |
*tiles_per_row = tex->bo_stride; |
return tex_tile_none_offset; |
} |
} |
} |
static void |
tex_staging_sys_zs_read(struct ilo_context *ilo, |
struct ilo_texture *tex, |
const struct ilo_transfer *xfer) |
{ |
const bool swizzle = ilo->dev->has_address_swizzling; |
const struct pipe_box *box = &xfer->base.box; |
const uint8_t *src = intel_bo_get_virtual(tex->bo); |
tex_tile_offset_func tile_offset; |
unsigned tiles_per_row; |
int slice; |
tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); |
assert(tex->block_width == 1 && tex->block_height == 1); |
if (tex->separate_s8) { |
struct ilo_texture *s8_tex = tex->separate_s8; |
const uint8_t *s8_src = intel_bo_get_virtual(s8_tex->bo); |
tex_tile_offset_func s8_tile_offset; |
unsigned s8_tiles_per_row; |
int dst_cpp, dst_s8_pos, src_cpp_used; |
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); |
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { |
assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM); |
dst_cpp = 4; |
dst_s8_pos = 3; |
src_cpp_used = 3; |
} |
else { |
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); |
assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT); |
dst_cpp = 8; |
dst_s8_pos = 4; |
src_cpp_used = 4; |
} |
for (slice = 0; slice < box->depth; slice++) { |
unsigned mem_x, mem_y, s8_mem_x, s8_mem_y; |
uint8_t *dst; |
int i, j; |
tex_get_box_origin(tex, xfer->base.level, slice, |
box, &mem_x, &mem_y); |
tex_get_box_origin(s8_tex, xfer->base.level, slice, |
box, &s8_mem_x, &s8_mem_y); |
dst = xfer->staging_sys + xfer->base.layer_stride * slice; |
for (i = 0; i < box->height; i++) { |
unsigned x = mem_x, s8_x = s8_mem_x; |
uint8_t *d = dst; |
for (j = 0; j < box->width; j++) { |
const unsigned offset = |
tile_offset(x, mem_y, tiles_per_row, swizzle); |
const unsigned s8_offset = |
s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle); |
memcpy(d, src + offset, src_cpp_used); |
d[dst_s8_pos] = s8_src[s8_offset]; |
d += dst_cpp; |
x += tex->bo_cpp; |
s8_x++; |
} |
dst += xfer->base.stride; |
mem_y++; |
s8_mem_y++; |
} |
} |
} |
else { |
assert(tex->bo_format == PIPE_FORMAT_S8_UINT); |
for (slice = 0; slice < box->depth; slice++) { |
unsigned mem_x, mem_y; |
uint8_t *dst; |
int i, j; |
tex_get_box_origin(tex, xfer->base.level, slice, |
box, &mem_x, &mem_y); |
dst = xfer->staging_sys + xfer->base.layer_stride * slice; |
for (i = 0; i < box->height; i++) { |
unsigned x = mem_x; |
uint8_t *d = dst; |
for (j = 0; j < box->width; j++) { |
const unsigned offset = |
tile_offset(x, mem_y, tiles_per_row, swizzle); |
*d = src[offset]; |
d++; |
x++; |
} |
dst += xfer->base.stride; |
mem_y++; |
} |
} |
} |
} |
static void |
tex_staging_sys_zs_write(struct ilo_context *ilo, |
struct ilo_texture *tex, |
const struct ilo_transfer *xfer) |
{ |
const bool swizzle = ilo->dev->has_address_swizzling; |
const struct pipe_box *box = &xfer->base.box; |
uint8_t *dst = intel_bo_get_virtual(tex->bo); |
tex_tile_offset_func tile_offset; |
unsigned tiles_per_row; |
int slice; |
tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); |
assert(tex->block_width == 1 && tex->block_height == 1); |
if (tex->separate_s8) { |
struct ilo_texture *s8_tex = tex->separate_s8; |
uint8_t *s8_dst = intel_bo_get_virtual(s8_tex->bo); |
tex_tile_offset_func s8_tile_offset; |
unsigned s8_tiles_per_row; |
int src_cpp, src_s8_pos, dst_cpp_used; |
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); |
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { |
assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM); |
src_cpp = 4; |
src_s8_pos = 3; |
dst_cpp_used = 3; |
} |
else { |
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); |
assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT); |
src_cpp = 8; |
src_s8_pos = 4; |
dst_cpp_used = 4; |
} |
for (slice = 0; slice < box->depth; slice++) { |
unsigned mem_x, mem_y, s8_mem_x, s8_mem_y; |
const uint8_t *src; |
int i, j; |
tex_get_box_origin(tex, xfer->base.level, slice, |
box, &mem_x, &mem_y); |
tex_get_box_origin(s8_tex, xfer->base.level, slice, |
box, &s8_mem_x, &s8_mem_y); |
src = xfer->staging_sys + xfer->base.layer_stride * slice; |
for (i = 0; i < box->height; i++) { |
unsigned x = mem_x, s8_x = s8_mem_x; |
const uint8_t *s = src; |
for (j = 0; j < box->width; j++) { |
const unsigned offset = |
tile_offset(x, mem_y, tiles_per_row, swizzle); |
const unsigned s8_offset = |
s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle); |
memcpy(dst + offset, s, dst_cpp_used); |
s8_dst[s8_offset] = s[src_s8_pos]; |
s += src_cpp; |
x += tex->bo_cpp; |
s8_x++; |
} |
src += xfer->base.stride; |
mem_y++; |
s8_mem_y++; |
} |
} |
} |
else { |
assert(tex->bo_format == PIPE_FORMAT_S8_UINT); |
for (slice = 0; slice < box->depth; slice++) { |
unsigned mem_x, mem_y; |
const uint8_t *src; |
int i, j; |
tex_get_box_origin(tex, xfer->base.level, slice, |
box, &mem_x, &mem_y); |
src = xfer->staging_sys + xfer->base.layer_stride * slice; |
for (i = 0; i < box->height; i++) { |
unsigned x = mem_x; |
const uint8_t *s = src; |
for (j = 0; j < box->width; j++) { |
const unsigned offset = |
tile_offset(x, mem_y, tiles_per_row, swizzle); |
dst[offset] = *s; |
s++; |
x++; |
} |
src += xfer->base.stride; |
mem_y++; |
} |
} |
} |
} |
static void |
tex_staging_sys_convert_write(struct ilo_context *ilo, |
struct ilo_texture *tex, |
const struct ilo_transfer *xfer) |
{ |
const struct pipe_box *box = &xfer->base.box; |
unsigned dst_slice_stride; |
void *dst; |
int slice; |
dst = intel_bo_get_virtual(tex->bo); |
dst += tex_get_box_offset(tex, xfer->base.level, box); |
/* slice stride is not always available */ |
if (box->depth > 1) |
dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level); |
else |
dst_slice_stride = 0; |
if (unlikely(tex->bo_format == tex->base.format)) { |
util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride, |
0, 0, 0, box->width, box->height, box->depth, |
xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride, |
0, 0, 0); |
return; |
} |
switch (tex->base.format) { |
case PIPE_FORMAT_ETC1_RGB8: |
assert(tex->bo_format == PIPE_FORMAT_R8G8B8X8_UNORM); |
for (slice = 0; slice < box->depth; slice++) { |
const void *src = |
xfer->staging_sys + xfer->base.layer_stride * slice; |
util_format_etc1_rgb8_unpack_rgba_8unorm(dst, |
tex->bo_stride, src, xfer->base.stride, |
box->width, box->height); |
dst += dst_slice_stride; |
} |
break; |
default: |
assert(!"unable to convert the staging data"); |
break; |
} |
} |
static bool |
tex_staging_sys_map_bo(const struct ilo_context *ilo, |
const struct ilo_texture *tex, |
bool for_read_back, bool linear_view) |
{ |
const bool prefer_cpu = (ilo->dev->has_llc || for_read_back); |
int err; |
if (prefer_cpu && (tex->tiling == INTEL_TILING_NONE || !linear_view)) |
err = intel_bo_map(tex->bo, !for_read_back); |
else |
err = intel_bo_map_gtt(tex->bo); |
if (!tex->separate_s8) |
return !err; |
err = intel_bo_map(tex->separate_s8->bo, !for_read_back); |
if (err) |
intel_bo_unmap(tex->bo); |
return !err; |
} |
static void |
tex_staging_sys_unmap_bo(const struct ilo_context *ilo, |
const struct ilo_texture *tex) |
{ |
if (tex->separate_s8) |
intel_bo_unmap(tex->separate_s8->bo); |
intel_bo_unmap(tex->bo); |
} |
static void |
tex_staging_sys_unmap(struct ilo_context *ilo, |
struct ilo_texture *tex, |
struct ilo_transfer *xfer) |
{ |
bool success; |
if (!(xfer->base.usage & PIPE_TRANSFER_WRITE)) { |
FREE(xfer->staging_sys); |
return; |
} |
switch (xfer->method) { |
case ILO_TRANSFER_MAP_SW_CONVERT: |
success = tex_staging_sys_map_bo(ilo, tex, false, true); |
if (success) { |
tex_staging_sys_convert_write(ilo, tex, xfer); |
tex_staging_sys_unmap_bo(ilo, tex); |
} |
break; |
case ILO_TRANSFER_MAP_SW_ZS: |
success = tex_staging_sys_map_bo(ilo, tex, false, false); |
if (success) { |
tex_staging_sys_zs_write(ilo, tex, xfer); |
tex_staging_sys_unmap_bo(ilo, tex); |
} |
break; |
default: |
assert(!"unknown mapping method"); |
success = false; |
break; |
} |
if (!success) |
ilo_err("failed to map resource for moving staging data\n"); |
FREE(xfer->staging_sys); |
} |
static bool |
tex_staging_sys_map(struct ilo_context *ilo, |
struct ilo_texture *tex, |
struct ilo_transfer *xfer) |
{ |
const struct pipe_box *box = &xfer->base.box; |
const size_t stride = util_format_get_stride(tex->base.format, box->width); |
const size_t size = |
util_format_get_2d_size(tex->base.format, stride, box->height); |
bool read_back = false, success; |
xfer->staging_sys = MALLOC(size * box->depth); |
if (!xfer->staging_sys) |
return false; |
xfer->base.stride = stride; |
xfer->base.layer_stride = size; |
xfer->ptr = xfer->staging_sys; |
/* see if we need to read the resource back */ |
if (xfer->base.usage & PIPE_TRANSFER_READ) { |
read_back = true; |
} |
else if (xfer->base.usage & PIPE_TRANSFER_WRITE) { |
const unsigned discard_flags = |
(PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE); |
if (!(xfer->base.usage & discard_flags)) |
read_back = true; |
} |
if (!read_back) |
return true; |
switch (xfer->method) { |
case ILO_TRANSFER_MAP_SW_CONVERT: |
assert(!"no on-the-fly format conversion for mapping"); |
success = false; |
break; |
case ILO_TRANSFER_MAP_SW_ZS: |
success = tex_staging_sys_map_bo(ilo, tex, true, false); |
if (success) { |
tex_staging_sys_zs_read(ilo, tex, xfer); |
tex_staging_sys_unmap_bo(ilo, tex); |
} |
break; |
default: |
assert(!"unknown mapping method"); |
success = false; |
break; |
} |
return success; |
} |
static void |
tex_direct_unmap(struct ilo_context *ilo, |
struct ilo_texture *tex, |
struct ilo_transfer *xfer) |
{ |
intel_bo_unmap(tex->bo); |
} |
static bool |
tex_direct_map(struct ilo_context *ilo, |
struct ilo_texture *tex, |
struct ilo_transfer *xfer) |
{ |
if (!map_bo_for_transfer(ilo, tex->bo, xfer)) |
return false; |
/* note that stride is for a block row, not a texel row */ |
xfer->base.stride = tex->bo_stride; |
/* slice stride is not always available */ |
if (xfer->base.box.depth > 1) |
xfer->base.layer_stride = tex_get_slice_stride(tex, xfer->base.level); |
else |
xfer->base.layer_stride = 0; |
xfer->ptr = intel_bo_get_virtual(tex->bo); |
xfer->ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box); |
return true; |
} |
static bool |
tex_map(struct ilo_context *ilo, struct ilo_transfer *xfer) |
{ |
struct ilo_texture *tex = ilo_texture(xfer->base.resource); |
bool success; |
if (!choose_transfer_method(ilo, xfer)) |
return false; |
switch (xfer->method) { |
case ILO_TRANSFER_MAP_CPU: |
case ILO_TRANSFER_MAP_GTT: |
case ILO_TRANSFER_MAP_UNSYNC: |
success = tex_direct_map(ilo, tex, xfer); |
break; |
case ILO_TRANSFER_MAP_SW_CONVERT: |
case ILO_TRANSFER_MAP_SW_ZS: |
success = tex_staging_sys_map(ilo, tex, xfer); |
break; |
default: |
assert(!"unknown mapping method"); |
success = false; |
break; |
} |
return success; |
} |
static void |
tex_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer) |
{ |
struct ilo_texture *tex = ilo_texture(xfer->base.resource); |
switch (xfer->method) { |
case ILO_TRANSFER_MAP_CPU: |
case ILO_TRANSFER_MAP_GTT: |
case ILO_TRANSFER_MAP_UNSYNC: |
tex_direct_unmap(ilo, tex, xfer); |
break; |
case ILO_TRANSFER_MAP_SW_CONVERT: |
case ILO_TRANSFER_MAP_SW_ZS: |
tex_staging_sys_unmap(ilo, tex, xfer); |
break; |
default: |
assert(!"unknown mapping method"); |
break; |
} |
} |
static bool |
buf_map(struct ilo_context *ilo, struct ilo_transfer *xfer) |
{ |
struct ilo_buffer *buf = ilo_buffer(xfer->base.resource); |
if (!choose_transfer_method(ilo, xfer)) |
return false; |
if (!map_bo_for_transfer(ilo, buf->bo, xfer)) |
return false; |
assert(xfer->base.level == 0); |
assert(xfer->base.box.y == 0); |
assert(xfer->base.box.z == 0); |
assert(xfer->base.box.height == 1); |
assert(xfer->base.box.depth == 1); |
xfer->base.stride = 0; |
xfer->base.layer_stride = 0; |
xfer->ptr = intel_bo_get_virtual(buf->bo); |
xfer->ptr += xfer->base.box.x; |
return true; |
} |
static void |
buf_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer) |
{ |
struct ilo_buffer *buf = ilo_buffer(xfer->base.resource); |
intel_bo_unmap(buf->bo); |
} |
static void |
buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf, |
unsigned usage, int offset, int size, const void *data) |
{ |
bool need_flush; |
/* see if we can avoid stalling */ |
if (is_bo_busy(ilo, buf->bo, &need_flush)) { |
bool will_stall = true; |
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { |
/* old data not needed so discard the old bo to avoid stalling */ |
if (ilo_buffer_alloc_bo(buf)) { |
ilo_mark_states_with_resource_dirty(ilo, &buf->base); |
will_stall = false; |
} |
} |
else { |
/* |
* We could allocate a temporary bo to hold the data and emit |
* pipelined copy blit to move them to buf->bo. But for now, do |
* nothing. |
*/ |
} |
/* flush to make bo busy (so that pwrite() stalls as it should be) */ |
if (will_stall && need_flush) |
ilo_cp_flush(ilo->cp); |
} |
intel_bo_pwrite(buf->bo, offset, size, data); |
} |
static void |
ilo_transfer_flush_region(struct pipe_context *pipe, |
struct pipe_transfer *transfer, |
const struct pipe_box *box) |
{ |
} |
static void |
ilo_transfer_unmap(struct pipe_context *pipe, |
struct pipe_transfer *transfer) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_transfer *xfer = ilo_transfer(transfer); |
if (xfer->base.resource->target == PIPE_BUFFER) |
buf_unmap(ilo, xfer); |
else |
tex_unmap(ilo, xfer); |
pipe_resource_reference(&xfer->base.resource, NULL); |
util_slab_free(&ilo->transfer_mempool, xfer); |
} |
static void * |
ilo_transfer_map(struct pipe_context *pipe, |
struct pipe_resource *res, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
struct pipe_transfer **transfer) |
{ |
struct ilo_context *ilo = ilo_context(pipe); |
struct ilo_transfer *xfer; |
bool success; |
xfer = util_slab_alloc(&ilo->transfer_mempool); |
if (!xfer) { |
*transfer = NULL; |
return NULL; |
} |
xfer->base.resource = NULL; |
pipe_resource_reference(&xfer->base.resource, res); |
xfer->base.level = level; |
xfer->base.usage = usage; |
xfer->base.box = *box; |
if (res->target == PIPE_BUFFER) |
success = buf_map(ilo, xfer); |
else |
success = tex_map(ilo, xfer); |
if (!success) { |
pipe_resource_reference(&xfer->base.resource, NULL); |
FREE(xfer); |
*transfer = NULL; |
return NULL; |
} |
*transfer = &xfer->base; |
return xfer->ptr; |
} |
static void |
ilo_transfer_inline_write(struct pipe_context *pipe, |
struct pipe_resource *res, |
unsigned level, |
unsigned usage, |
const struct pipe_box *box, |
const void *data, |
unsigned stride, |
unsigned layer_stride) |
{ |
if (likely(res->target == PIPE_BUFFER) && |
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { |
/* they should specify just an offset and a size */ |
assert(level == 0); |
assert(box->y == 0); |
assert(box->z == 0); |
assert(box->height == 1); |
assert(box->depth == 1); |
buf_pwrite(ilo_context(pipe), ilo_buffer(res), |
usage, box->x, box->width, data); |
} |
else { |
u_default_transfer_inline_write(pipe, res, |
level, usage, box, data, stride, layer_stride); |
} |
} |
/** |
* Initialize transfer-related functions. |
*/ |
void |
ilo_init_transfer_functions(struct ilo_context *ilo) |
{ |
ilo->base.transfer_map = ilo_transfer_map; |
ilo->base.transfer_flush_region = ilo_transfer_flush_region; |
ilo->base.transfer_unmap = ilo_transfer_unmap; |
ilo->base.transfer_inline_write = ilo_transfer_inline_write; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_transfer.h |
---|
0,0 → 1,66 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_TRANSFER_H |
#define ILO_TRANSFER_H |
#include "pipe/p_state.h" |
#include "ilo_common.h" |
enum ilo_transfer_map_method { |
/* map() / map_gtt() / map_unsynchronized() */ |
ILO_TRANSFER_MAP_CPU, |
ILO_TRANSFER_MAP_GTT, |
ILO_TRANSFER_MAP_UNSYNC, |
/* use staging system buffer */ |
ILO_TRANSFER_MAP_SW_CONVERT, |
ILO_TRANSFER_MAP_SW_ZS, |
}; |
struct ilo_transfer { |
struct pipe_transfer base; |
enum ilo_transfer_map_method method; |
void *ptr; |
void *staging_sys; |
}; |
struct ilo_context; |
static inline struct ilo_transfer * |
ilo_transfer(struct pipe_transfer *transfer) |
{ |
return (struct ilo_transfer *) transfer; |
} |
void |
ilo_init_transfer_functions(struct ilo_context *ilo); |
#endif /* ILO_TRANSFER_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_video.c |
---|
0,0 → 1,65 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "vl/vl_decoder.h" |
#include "vl/vl_video_buffer.h" |
#include "ilo_context.h" |
#include "ilo_video.h" |
/* |
* Nothing here. We could make use of the video codec engine someday. |
*/ |
static struct pipe_video_decoder * |
ilo_create_video_decoder(struct pipe_context *pipe, |
enum pipe_video_profile profile, |
enum pipe_video_entrypoint entrypoint, |
enum pipe_video_chroma_format chroma_format, |
unsigned width, unsigned height, unsigned max_references, |
bool expect_chunked_decode) |
{ |
return vl_create_decoder(pipe, profile, entrypoint, chroma_format, |
width, height, max_references, expect_chunked_decode); |
} |
static struct pipe_video_buffer * |
ilo_create_video_buffer(struct pipe_context *pipe, |
const struct pipe_video_buffer *templ) |
{ |
return vl_video_buffer_create(pipe, templ); |
} |
/** |
* Initialize video-related functions. |
*/ |
void |
ilo_init_video_functions(struct ilo_context *ilo) |
{ |
ilo->base.create_video_decoder = ilo_create_video_decoder; |
ilo->base.create_video_buffer = ilo_create_video_buffer; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_video.h |
---|
0,0 → 1,38 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_VIDEO_H |
#define ILO_VIDEO_H |
#include "ilo_common.h" |
struct ilo_context; |
void |
ilo_init_video_functions(struct ilo_context *ilo); |
#endif /* ILO_VIDEO_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/brw_defines.h |
---|
0,0 → 1,1728 |
/* |
Copyright (C) Intel Corp. 2006. All Rights Reserved. |
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
develop this 3D driver. |
Permission is hereby granted, free of charge, to any person obtaining |
a copy of this software and associated documentation files (the |
"Software"), to deal in the Software without restriction, including |
without limitation the rights to use, copy, modify, merge, publish, |
distribute, sublicense, and/or sell copies of the Software, and to |
permit persons to whom the Software is furnished to do so, subject to |
the following conditions: |
The above copyright notice and this permission notice (including the |
next paragraph) shall be included in all copies or substantial |
portions of the Software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
**********************************************************************/ |
/* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low)) |
#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK) |
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) |
#ifndef BRW_DEFINES_H |
#define BRW_DEFINES_H |
/* 3D state: |
*/ |
#define PIPE_CONTROL_NOWRITE 0x00 |
#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 |
#define PIPE_CONTROL_WRITEDEPTH 0x02 |
#define PIPE_CONTROL_WRITETIMESTAMP 0x03 |
#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 |
#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 |
#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */ |
/* DW0 */ |
# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10 |
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) |
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) |
/* DW1 */ |
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) |
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) |
#define _3DPRIM_POINTLIST 0x01 |
#define _3DPRIM_LINELIST 0x02 |
#define _3DPRIM_LINESTRIP 0x03 |
#define _3DPRIM_TRILIST 0x04 |
#define _3DPRIM_TRISTRIP 0x05 |
#define _3DPRIM_TRIFAN 0x06 |
#define _3DPRIM_QUADLIST 0x07 |
#define _3DPRIM_QUADSTRIP 0x08 |
#define _3DPRIM_LINELIST_ADJ 0x09 |
#define _3DPRIM_LINESTRIP_ADJ 0x0A |
#define _3DPRIM_TRILIST_ADJ 0x0B |
#define _3DPRIM_TRISTRIP_ADJ 0x0C |
#define _3DPRIM_TRISTRIP_REVERSE 0x0D |
#define _3DPRIM_POLYGON 0x0E |
#define _3DPRIM_RECTLIST 0x0F |
#define _3DPRIM_LINELOOP 0x10 |
#define _3DPRIM_POINTLIST_BF 0x11 |
#define _3DPRIM_LINESTRIP_CONT 0x12 |
#define _3DPRIM_LINESTRIP_BF 0x13 |
#define _3DPRIM_LINESTRIP_CONT_BF 0x14 |
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 |
#define BRW_ANISORATIO_2 0 |
#define BRW_ANISORATIO_4 1 |
#define BRW_ANISORATIO_6 2 |
#define BRW_ANISORATIO_8 3 |
#define BRW_ANISORATIO_10 4 |
#define BRW_ANISORATIO_12 5 |
#define BRW_ANISORATIO_14 6 |
#define BRW_ANISORATIO_16 7 |
#define BRW_BLENDFACTOR_ONE 0x1 |
#define BRW_BLENDFACTOR_SRC_COLOR 0x2 |
#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 |
#define BRW_BLENDFACTOR_DST_ALPHA 0x4 |
#define BRW_BLENDFACTOR_DST_COLOR 0x5 |
#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 |
#define BRW_BLENDFACTOR_CONST_COLOR 0x7 |
#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 |
#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 |
#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A |
#define BRW_BLENDFACTOR_ZERO 0x11 |
#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 |
#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 |
#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 |
#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 |
#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 |
#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 |
#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 |
#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A |
#define BRW_BLENDFUNCTION_ADD 0 |
#define BRW_BLENDFUNCTION_SUBTRACT 1 |
#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 |
#define BRW_BLENDFUNCTION_MIN 3 |
#define BRW_BLENDFUNCTION_MAX 4 |
#define BRW_ALPHATEST_FORMAT_UNORM8 0 |
#define BRW_ALPHATEST_FORMAT_FLOAT32 1 |
#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 |
#define BRW_CHROMAKEY_REPLACE_BLACK 1 |
#define BRW_CLIP_API_OGL 0 |
#define BRW_CLIP_API_DX 1 |
#define BRW_CLIPMODE_NORMAL 0 |
#define BRW_CLIPMODE_CLIP_ALL 1 |
#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 |
#define BRW_CLIPMODE_REJECT_ALL 3 |
#define BRW_CLIPMODE_ACCEPT_ALL 4 |
#define BRW_CLIPMODE_KERNEL_CLIP 5 |
#define BRW_CLIP_NDCSPACE 0 |
#define BRW_CLIP_SCREENSPACE 1 |
#define BRW_COMPAREFUNCTION_ALWAYS 0 |
#define BRW_COMPAREFUNCTION_NEVER 1 |
#define BRW_COMPAREFUNCTION_LESS 2 |
#define BRW_COMPAREFUNCTION_EQUAL 3 |
#define BRW_COMPAREFUNCTION_LEQUAL 4 |
#define BRW_COMPAREFUNCTION_GREATER 5 |
#define BRW_COMPAREFUNCTION_NOTEQUAL 6 |
#define BRW_COMPAREFUNCTION_GEQUAL 7 |
#define BRW_COVERAGE_PIXELS_HALF 0 |
#define BRW_COVERAGE_PIXELS_1 1 |
#define BRW_COVERAGE_PIXELS_2 2 |
#define BRW_COVERAGE_PIXELS_4 3 |
#define BRW_CULLMODE_BOTH 0 |
#define BRW_CULLMODE_NONE 1 |
#define BRW_CULLMODE_FRONT 2 |
#define BRW_CULLMODE_BACK 3 |
#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 |
#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 |
#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 |
#define BRW_DEPTHFORMAT_D32_FLOAT 1 |
#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 |
#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */ |
#define BRW_DEPTHFORMAT_D16_UNORM 5 |
#define BRW_FLOATING_POINT_IEEE_754 0 |
#define BRW_FLOATING_POINT_NON_IEEE_754 1 |
#define BRW_FRONTWINDING_CW 0 |
#define BRW_FRONTWINDING_CCW 1 |
#define BRW_SPRITE_POINT_ENABLE 16 |
#define BRW_CUT_INDEX_ENABLE (1 << 10) |
#define BRW_INDEX_BYTE 0 |
#define BRW_INDEX_WORD 1 |
#define BRW_INDEX_DWORD 2 |
#define BRW_LOGICOPFUNCTION_CLEAR 0 |
#define BRW_LOGICOPFUNCTION_NOR 1 |
#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 |
#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 |
#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 |
#define BRW_LOGICOPFUNCTION_INVERT 5 |
#define BRW_LOGICOPFUNCTION_XOR 6 |
#define BRW_LOGICOPFUNCTION_NAND 7 |
#define BRW_LOGICOPFUNCTION_AND 8 |
#define BRW_LOGICOPFUNCTION_EQUIV 9 |
#define BRW_LOGICOPFUNCTION_NOOP 10 |
#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 |
#define BRW_LOGICOPFUNCTION_COPY 12 |
#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 |
#define BRW_LOGICOPFUNCTION_OR 14 |
#define BRW_LOGICOPFUNCTION_SET 15 |
#define BRW_MAPFILTER_NEAREST 0x0 |
#define BRW_MAPFILTER_LINEAR 0x1 |
#define BRW_MAPFILTER_ANISOTROPIC 0x2 |
#define BRW_MIPFILTER_NONE 0 |
#define BRW_MIPFILTER_NEAREST 1 |
#define BRW_MIPFILTER_LINEAR 3 |
#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 |
#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 |
#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 |
#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 |
#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 |
#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 |
#define BRW_POLYGON_FRONT_FACING 0 |
#define BRW_POLYGON_BACK_FACING 1 |
#define BRW_PREFILTER_ALWAYS 0x0 |
#define BRW_PREFILTER_NEVER 0x1 |
#define BRW_PREFILTER_LESS 0x2 |
#define BRW_PREFILTER_EQUAL 0x3 |
#define BRW_PREFILTER_LEQUAL 0x4 |
#define BRW_PREFILTER_GREATER 0x5 |
#define BRW_PREFILTER_NOTEQUAL 0x6 |
#define BRW_PREFILTER_GEQUAL 0x7 |
#define BRW_PROVOKING_VERTEX_0 0 |
#define BRW_PROVOKING_VERTEX_1 1 |
#define BRW_PROVOKING_VERTEX_2 2 |
#define BRW_RASTRULE_UPPER_LEFT 0 |
#define BRW_RASTRULE_UPPER_RIGHT 1 |
/* These are listed as "Reserved, but not seen as useful" |
* in Intel documentation (page 212, "Point Rasterization Rule", |
* section 7.4 "SF Pipeline State Summary", of document |
* "Intel® 965 Express Chipset Family and Intel® G35 Express |
* Chipset Graphics Controller Programmer's Reference Manual, |
* Volume 2: 3D/Media", Revision 1.0b as of January 2008, |
* available at |
* http://intellinuxgraphics.org/documentation.html |
* at the time of this writing). |
* |
* These appear to be supported on at least some |
* i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT |
* is useful when using OpenGL to render to a FBO |
* (which has the pixel coordinate Y orientation inverted |
* with respect to the normal OpenGL pixel coordinate system). |
*/ |
#define BRW_RASTRULE_LOWER_LEFT 2 |
#define BRW_RASTRULE_LOWER_RIGHT 3 |
#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 |
#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 |
#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 |
#define BRW_STENCILOP_KEEP 0 |
#define BRW_STENCILOP_ZERO 1 |
#define BRW_STENCILOP_REPLACE 2 |
#define BRW_STENCILOP_INCRSAT 3 |
#define BRW_STENCILOP_DECRSAT 4 |
#define BRW_STENCILOP_INCR 5 |
#define BRW_STENCILOP_DECR 6 |
#define BRW_STENCILOP_INVERT 7 |
/* Surface state DW0 */ |
#define BRW_SURFACE_RC_READ_WRITE (1 << 8) |
#define BRW_SURFACE_MIPLAYOUT_SHIFT 10 |
#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 |
#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 |
#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f |
#define BRW_SURFACE_BLEND_ENABLED (1 << 13) |
#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14 |
#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15 |
#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16 |
#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17 |
#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 |
#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 |
#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 |
#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 |
#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 |
#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 |
#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 |
#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 |
#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 |
#define BRW_SURFACEFORMAT_R32G32B32A32_SFIXED 0x020 |
#define BRW_SURFACEFORMAT_R64G64_PASSTHRU 0x021 |
#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 |
#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 |
#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 |
#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 |
#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 |
#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 |
#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 |
#define BRW_SURFACEFORMAT_R32G32B32_SFIXED 0x050 |
#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 |
#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 |
#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 |
#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 |
#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 |
#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 |
#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 |
#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 |
#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 |
#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 |
#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A |
#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B |
#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C |
#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D |
#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E |
#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F |
#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 |
#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 |
#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 |
#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 |
#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 |
#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 |
#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 |
#define BRW_SURFACEFORMAT_R32G32_SFIXED 0x0A0 |
#define BRW_SURFACEFORMAT_R64_PASSTHRU 0x0A1 |
#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 |
#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 |
#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 |
#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 |
#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 |
#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 |
#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 |
#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 |
#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 |
#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA |
#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB |
#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC |
#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD |
#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE |
#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF |
#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 |
#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 |
#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 |
#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 |
#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 |
#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 |
#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 |
#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 |
#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA |
#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF |
#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 |
#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 |
#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 |
#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 |
#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 |
#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 |
#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 |
#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA |
#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB |
#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC |
#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED |
#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE |
#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 |
#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 |
#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 |
#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 |
#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 |
#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 |
#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 |
#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 |
#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 |
#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 |
#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 |
#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 |
#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 |
#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 |
#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 |
#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 |
#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 |
#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 |
#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 |
#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 |
#define BRW_SURFACEFORMAT_R16_UNORM 0x10A |
#define BRW_SURFACEFORMAT_R16_SNORM 0x10B |
#define BRW_SURFACEFORMAT_R16_SINT 0x10C |
#define BRW_SURFACEFORMAT_R16_UINT 0x10D |
#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E |
#define BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0 0x10F |
#define BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1 0x110 |
#define BRW_SURFACEFORMAT_I16_UNORM 0x111 |
#define BRW_SURFACEFORMAT_L16_UNORM 0x112 |
#define BRW_SURFACEFORMAT_A16_UNORM 0x113 |
#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 |
#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 |
#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 |
#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 |
#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 |
#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 |
#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A |
#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B |
#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C |
#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D |
#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E |
#define BRW_SURFACEFORMAT_R16_USCALED 0x11F |
#define BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0 0x122 |
#define BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1 0x123 |
#define BRW_SURFACEFORMAT_A1B5G5R5_UNORM 0x124 |
#define BRW_SURFACEFORMAT_A4B4G4R4_UNORM 0x125 |
#define BRW_SURFACEFORMAT_L8A8_UINT 0x126 |
#define BRW_SURFACEFORMAT_L8A8_SINT 0x127 |
#define BRW_SURFACEFORMAT_R8_UNORM 0x140 |
#define BRW_SURFACEFORMAT_R8_SNORM 0x141 |
#define BRW_SURFACEFORMAT_R8_SINT 0x142 |
#define BRW_SURFACEFORMAT_R8_UINT 0x143 |
#define BRW_SURFACEFORMAT_A8_UNORM 0x144 |
#define BRW_SURFACEFORMAT_I8_UNORM 0x145 |
#define BRW_SURFACEFORMAT_L8_UNORM 0x146 |
#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 |
#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 |
#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 |
#define BRW_SURFACEFORMAT_R8_USCALED 0x14A |
#define BRW_SURFACEFORMAT_P8_UNORM_PALETTE0 0x14B |
#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C |
#define BRW_SURFACEFORMAT_P8_UNORM_PALETTE1 0x14D |
#define BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1 0x14E |
#define BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1 0x14F |
#define BRW_SURFACEFORMAT_Y8_SNORM 0x150 |
#define BRW_SURFACEFORMAT_L8_UINT 0x152 |
#define BRW_SURFACEFORMAT_L8_SINT 0x153 |
#define BRW_SURFACEFORMAT_I8_UINT 0x154 |
#define BRW_SURFACEFORMAT_I8_SINT 0x155 |
#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180 |
#define BRW_SURFACEFORMAT_R1_UINT 0x181 |
#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 |
#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 |
#define BRW_SURFACEFORMAT_P2_UNORM_PALETTE0 0x184 |
#define BRW_SURFACEFORMAT_P2_UNORM_PALETTE1 0x185 |
#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 |
#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 |
#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 |
#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 |
#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A |
#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B |
#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C |
#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D |
#define BRW_SURFACEFORMAT_MONO8 0x18E |
#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F |
#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 |
#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 |
#define BRW_SURFACEFORMAT_FXT1 0x192 |
#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 |
#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 |
#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 |
#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 |
#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 |
#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 |
#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 |
#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A |
#define BRW_SURFACEFORMAT_R16G16B16_FLOAT 0x19B |
#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C |
#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D |
#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E |
#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F |
#define BRW_SURFACEFORMAT_BC6H_SF16 0x1A1 |
#define BRW_SURFACEFORMAT_BC7_UNORM 0x1A2 |
#define BRW_SURFACEFORMAT_BC7_UNORM_SRGB 0x1A3 |
#define BRW_SURFACEFORMAT_BC6H_UF16 0x1A4 |
#define BRW_SURFACEFORMAT_PLANAR_420_8 0x1A5 |
#define BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB 0x1A8 |
#define BRW_SURFACEFORMAT_ETC1_RGB8 0x1A9 |
#define BRW_SURFACEFORMAT_ETC2_RGB8 0x1AA |
#define BRW_SURFACEFORMAT_EAC_R11 0x1AB |
#define BRW_SURFACEFORMAT_EAC_RG11 0x1AC |
#define BRW_SURFACEFORMAT_EAC_SIGNED_R11 0x1AD |
#define BRW_SURFACEFORMAT_EAC_SIGNED_RG11 0x1AE |
#define BRW_SURFACEFORMAT_ETC2_SRGB8 0x1AF |
#define BRW_SURFACEFORMAT_R16G16B16_UINT 0x1B0 |
#define BRW_SURFACEFORMAT_R16G16B16_SINT 0x1B1 |
#define BRW_SURFACEFORMAT_R32_SFIXED 0x1B2 |
#define BRW_SURFACEFORMAT_R10G10B10A2_SNORM 0x1B3 |
#define BRW_SURFACEFORMAT_R10G10B10A2_USCALED 0x1B4 |
#define BRW_SURFACEFORMAT_R10G10B10A2_SSCALED 0x1B5 |
#define BRW_SURFACEFORMAT_R10G10B10A2_SINT 0x1B6 |
#define BRW_SURFACEFORMAT_B10G10R10A2_SNORM 0x1B7 |
#define BRW_SURFACEFORMAT_B10G10R10A2_USCALED 0x1B8 |
#define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED 0x1B9 |
#define BRW_SURFACEFORMAT_B10G10R10A2_UINT 0x1BA |
#define BRW_SURFACEFORMAT_B10G10R10A2_SINT 0x1BB |
#define BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU 0x1BC |
#define BRW_SURFACEFORMAT_R64G64B64_PASSTHRU 0x1BD |
#define BRW_SURFACEFORMAT_ETC2_RGB8_PTA 0x1C0 |
#define BRW_SURFACEFORMAT_ETC2_SRGB8_PTA 0x1C1 |
#define BRW_SURFACEFORMAT_ETC2_EAC_RGBA8 0x1C2 |
#define BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8 0x1C3 |
#define BRW_SURFACEFORMAT_R8G8B8_UINT 0x1C8 |
#define BRW_SURFACEFORMAT_R8G8B8_SINT 0x1C9 |
#define BRW_SURFACEFORMAT_RAW 0x1FF |
#define BRW_SURFACE_FORMAT_SHIFT 18 |
#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) |
#define BRW_SURFACERETURNFORMAT_FLOAT32 0 |
#define BRW_SURFACERETURNFORMAT_S1 1 |
#define BRW_SURFACE_TYPE_SHIFT 29 |
#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29) |
#define BRW_SURFACE_1D 0 |
#define BRW_SURFACE_2D 1 |
#define BRW_SURFACE_3D 2 |
#define BRW_SURFACE_CUBE 3 |
#define BRW_SURFACE_BUFFER 4 |
#define BRW_SURFACE_NULL 7 |
#define GEN7_SURFACE_IS_ARRAY (1 << 28) |
#define GEN7_SURFACE_VALIGN_2 (0 << 16) |
#define GEN7_SURFACE_VALIGN_4 (1 << 16) |
#define GEN7_SURFACE_HALIGN_4 (0 << 15) |
#define GEN7_SURFACE_HALIGN_8 (1 << 15) |
#define GEN7_SURFACE_TILING_NONE (0 << 13) |
#define GEN7_SURFACE_TILING_X (2 << 13) |
#define GEN7_SURFACE_TILING_Y (3 << 13) |
#define GEN7_SURFACE_ARYSPC_FULL (0 << 10) |
#define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10) |
/* Surface state DW2 */ |
#define BRW_SURFACE_HEIGHT_SHIFT 19 |
#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19) |
#define BRW_SURFACE_WIDTH_SHIFT 6 |
#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6) |
#define BRW_SURFACE_LOD_SHIFT 2 |
#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2) |
#define GEN7_SURFACE_HEIGHT_SHIFT 16 |
#define GEN7_SURFACE_HEIGHT_MASK INTEL_MASK(29, 16) |
#define GEN7_SURFACE_WIDTH_SHIFT 0 |
#define GEN7_SURFACE_WIDTH_MASK INTEL_MASK(13, 0) |
/* Surface state DW3 */ |
#define BRW_SURFACE_DEPTH_SHIFT 21 |
#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21) |
#define BRW_SURFACE_PITCH_SHIFT 3 |
#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3) |
#define BRW_SURFACE_TILED (1 << 1) |
#define BRW_SURFACE_TILED_Y (1 << 0) |
/* Surface state DW4 */ |
#define BRW_SURFACE_MIN_LOD_SHIFT 28 |
#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28) |
#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4) |
#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4) |
#define GEN7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3) |
#define GEN7_SURFACE_MULTISAMPLECOUNT_4 (2 << 3) |
#define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3) |
#define GEN7_SURFACE_MSFMT_MSS (0 << 6) |
#define GEN7_SURFACE_MSFMT_DEPTH_STENCIL (1 << 6) |
/* Surface state DW5 */ |
#define BRW_SURFACE_X_OFFSET_SHIFT 25 |
#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25) |
#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24) |
#define BRW_SURFACE_Y_OFFSET_SHIFT 20 |
#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20) |
#define GEN7_SURFACE_MIN_LOD_SHIFT 4 |
#define GEN7_SURFACE_MIN_LOD_MASK INTEL_MASK(7, 4) |
/* Surface state DW6 */ |
#define GEN7_SURFACE_MCS_ENABLE (1 << 0) |
#define GEN7_SURFACE_MCS_PITCH_SHIFT 3 |
#define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3) |
/* Surface state DW7 */ |
#define GEN7_SURFACE_SCS_R_SHIFT 25 |
#define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25) |
#define GEN7_SURFACE_SCS_G_SHIFT 22 |
#define GEN7_SURFACE_SCS_G_MASK INTEL_MASK(24, 22) |
#define GEN7_SURFACE_SCS_B_SHIFT 19 |
#define GEN7_SURFACE_SCS_B_MASK INTEL_MASK(21, 19) |
#define GEN7_SURFACE_SCS_A_SHIFT 16 |
#define GEN7_SURFACE_SCS_A_MASK INTEL_MASK(18, 16) |
/* The actual swizzle values/what channel to use */ |
#define HSW_SCS_ZERO 0 |
#define HSW_SCS_ONE 1 |
#define HSW_SCS_RED 4 |
#define HSW_SCS_GREEN 5 |
#define HSW_SCS_BLUE 6 |
#define HSW_SCS_ALPHA 7 |
#define BRW_TEXCOORDMODE_WRAP 0 |
#define BRW_TEXCOORDMODE_MIRROR 1 |
#define BRW_TEXCOORDMODE_CLAMP 2 |
#define BRW_TEXCOORDMODE_CUBE 3 |
#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 |
#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 |
#define BRW_THREAD_PRIORITY_NORMAL 0 |
#define BRW_THREAD_PRIORITY_HIGH 1 |
#define BRW_TILEWALK_XMAJOR 0 |
#define BRW_TILEWALK_YMAJOR 1 |
#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 |
#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 |
/* Execution Unit (EU) defines |
*/ |
#define BRW_ALIGN_1 0 |
#define BRW_ALIGN_16 1 |
#define BRW_ADDRESS_DIRECT 0 |
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 |
#define BRW_CHANNEL_X 0 |
#define BRW_CHANNEL_Y 1 |
#define BRW_CHANNEL_Z 2 |
#define BRW_CHANNEL_W 3 |
enum brw_compression { |
BRW_COMPRESSION_NONE = 0, |
BRW_COMPRESSION_2NDHALF = 1, |
BRW_COMPRESSION_COMPRESSED = 2, |
}; |
#define GEN6_COMPRESSION_1Q 0 |
#define GEN6_COMPRESSION_2Q 1 |
#define GEN6_COMPRESSION_3Q 2 |
#define GEN6_COMPRESSION_4Q 3 |
#define GEN6_COMPRESSION_1H 0 |
#define GEN6_COMPRESSION_2H 2 |
#define BRW_CONDITIONAL_NONE 0 |
#define BRW_CONDITIONAL_Z 1 |
#define BRW_CONDITIONAL_NZ 2 |
#define BRW_CONDITIONAL_EQ 1 /* Z */ |
#define BRW_CONDITIONAL_NEQ 2 /* NZ */ |
#define BRW_CONDITIONAL_G 3 |
#define BRW_CONDITIONAL_GE 4 |
#define BRW_CONDITIONAL_L 5 |
#define BRW_CONDITIONAL_LE 6 |
#define BRW_CONDITIONAL_R 7 |
#define BRW_CONDITIONAL_O 8 |
#define BRW_CONDITIONAL_U 9 |
#define BRW_DEBUG_NONE 0 |
#define BRW_DEBUG_BREAKPOINT 1 |
#define BRW_DEPENDENCY_NORMAL 0 |
#define BRW_DEPENDENCY_NOTCLEARED 1 |
#define BRW_DEPENDENCY_NOTCHECKED 2 |
#define BRW_DEPENDENCY_DISABLE 3 |
#define BRW_EXECUTE_1 0 |
#define BRW_EXECUTE_2 1 |
#define BRW_EXECUTE_4 2 |
#define BRW_EXECUTE_8 3 |
#define BRW_EXECUTE_16 4 |
#define BRW_EXECUTE_32 5 |
#define BRW_HORIZONTAL_STRIDE_0 0 |
#define BRW_HORIZONTAL_STRIDE_1 1 |
#define BRW_HORIZONTAL_STRIDE_2 2 |
#define BRW_HORIZONTAL_STRIDE_4 3 |
#define BRW_INSTRUCTION_NORMAL 0 |
#define BRW_INSTRUCTION_SATURATE 1 |
#define BRW_MASK_ENABLE 0 |
#define BRW_MASK_DISABLE 1 |
/** @{ |
* |
* Gen6 has replaced "mask enable/disable" with WECtrl, which is |
* effectively the same but much simpler to think about. Now, there |
* are two contributors ANDed together to whether channels are |
* executed: The predication on the instruction, and the channel write |
* enable. |
*/ |
/** |
* This is the default value. It means that a channel's write enable is set |
* if the per-channel IP is pointing at this instruction. |
*/ |
#define BRW_WE_NORMAL 0 |
/** |
* This is used like BRW_MASK_DISABLE, and causes all channels to have |
* their write enable set. Note that predication still contributes to |
* whether the channel actually gets written. |
*/ |
#define BRW_WE_ALL 1 |
/** @} */ |
enum opcode { |
/* These are the actual hardware opcodes. */ |
BRW_OPCODE_MOV = 1, |
BRW_OPCODE_SEL = 2, |
BRW_OPCODE_NOT = 4, |
BRW_OPCODE_AND = 5, |
BRW_OPCODE_OR = 6, |
BRW_OPCODE_XOR = 7, |
BRW_OPCODE_SHR = 8, |
BRW_OPCODE_SHL = 9, |
BRW_OPCODE_RSR = 10, |
BRW_OPCODE_RSL = 11, |
BRW_OPCODE_ASR = 12, |
BRW_OPCODE_CMP = 16, |
BRW_OPCODE_CMPN = 17, |
BRW_OPCODE_F32TO16 = 19, |
BRW_OPCODE_F16TO32 = 20, |
BRW_OPCODE_BFREV = 23, |
BRW_OPCODE_BFE = 24, |
BRW_OPCODE_BFI1 = 25, |
BRW_OPCODE_BFI2 = 26, |
BRW_OPCODE_JMPI = 32, |
BRW_OPCODE_IF = 34, |
BRW_OPCODE_IFF = 35, |
BRW_OPCODE_ELSE = 36, |
BRW_OPCODE_ENDIF = 37, |
BRW_OPCODE_DO = 38, |
BRW_OPCODE_WHILE = 39, |
BRW_OPCODE_BREAK = 40, |
BRW_OPCODE_CONTINUE = 41, |
BRW_OPCODE_HALT = 42, |
BRW_OPCODE_MSAVE = 44, |
BRW_OPCODE_MRESTORE = 45, |
BRW_OPCODE_PUSH = 46, |
BRW_OPCODE_POP = 47, |
BRW_OPCODE_WAIT = 48, |
BRW_OPCODE_SEND = 49, |
BRW_OPCODE_SENDC = 50, |
BRW_OPCODE_MATH = 56, |
BRW_OPCODE_ADD = 64, |
BRW_OPCODE_MUL = 65, |
BRW_OPCODE_AVG = 66, |
BRW_OPCODE_FRC = 67, |
BRW_OPCODE_RNDU = 68, |
BRW_OPCODE_RNDD = 69, |
BRW_OPCODE_RNDE = 70, |
BRW_OPCODE_RNDZ = 71, |
BRW_OPCODE_MAC = 72, |
BRW_OPCODE_MACH = 73, |
BRW_OPCODE_LZD = 74, |
BRW_OPCODE_FBH = 75, |
BRW_OPCODE_FBL = 76, |
BRW_OPCODE_CBIT = 77, |
BRW_OPCODE_SAD2 = 80, |
BRW_OPCODE_SADA2 = 81, |
BRW_OPCODE_DP4 = 84, |
BRW_OPCODE_DPH = 85, |
BRW_OPCODE_DP3 = 86, |
BRW_OPCODE_DP2 = 87, |
BRW_OPCODE_DPA2 = 88, |
BRW_OPCODE_LINE = 89, |
BRW_OPCODE_PLN = 90, |
BRW_OPCODE_MAD = 91, |
BRW_OPCODE_LRP = 92, |
BRW_OPCODE_NOP = 126, |
/* These are compiler backend opcodes that get translated into other |
* instructions. |
*/ |
FS_OPCODE_FB_WRITE = 128, |
SHADER_OPCODE_RCP, |
SHADER_OPCODE_RSQ, |
SHADER_OPCODE_SQRT, |
SHADER_OPCODE_EXP2, |
SHADER_OPCODE_LOG2, |
SHADER_OPCODE_POW, |
SHADER_OPCODE_INT_QUOTIENT, |
SHADER_OPCODE_INT_REMAINDER, |
SHADER_OPCODE_SIN, |
SHADER_OPCODE_COS, |
SHADER_OPCODE_TEX, |
SHADER_OPCODE_TXD, |
SHADER_OPCODE_TXF, |
SHADER_OPCODE_TXL, |
SHADER_OPCODE_TXS, |
FS_OPCODE_TXB, |
SHADER_OPCODE_TXF_MS, |
SHADER_OPCODE_LOD, |
SHADER_OPCODE_SHADER_TIME_ADD, |
FS_OPCODE_DDX, |
FS_OPCODE_DDY, |
FS_OPCODE_PIXEL_X, |
FS_OPCODE_PIXEL_Y, |
FS_OPCODE_CINTERP, |
FS_OPCODE_LINTERP, |
FS_OPCODE_SPILL, |
FS_OPCODE_UNSPILL, |
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, |
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, |
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, |
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, |
FS_OPCODE_MOV_DISPATCH_TO_FLAGS, |
FS_OPCODE_DISCARD_JUMP, |
FS_OPCODE_SET_SIMD4X2_OFFSET, |
FS_OPCODE_PACK_HALF_2x16_SPLIT, |
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, |
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, |
FS_OPCODE_PLACEHOLDER_HALT, |
VS_OPCODE_URB_WRITE, |
VS_OPCODE_SCRATCH_READ, |
VS_OPCODE_SCRATCH_WRITE, |
VS_OPCODE_PULL_CONSTANT_LOAD, |
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, |
}; |
#define BRW_PREDICATE_NONE 0 |
#define BRW_PREDICATE_NORMAL 1 |
#define BRW_PREDICATE_ALIGN1_ANYV 2 |
#define BRW_PREDICATE_ALIGN1_ALLV 3 |
#define BRW_PREDICATE_ALIGN1_ANY2H 4 |
#define BRW_PREDICATE_ALIGN1_ALL2H 5 |
#define BRW_PREDICATE_ALIGN1_ANY4H 6 |
#define BRW_PREDICATE_ALIGN1_ALL4H 7 |
#define BRW_PREDICATE_ALIGN1_ANY8H 8 |
#define BRW_PREDICATE_ALIGN1_ALL8H 9 |
#define BRW_PREDICATE_ALIGN1_ANY16H 10 |
#define BRW_PREDICATE_ALIGN1_ALL16H 11 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 |
#define BRW_PREDICATE_ALIGN16_ANY4H 6 |
#define BRW_PREDICATE_ALIGN16_ALL4H 7 |
#define BRW_ARCHITECTURE_REGISTER_FILE 0 |
#define BRW_GENERAL_REGISTER_FILE 1 |
#define BRW_MESSAGE_REGISTER_FILE 2 |
#define BRW_IMMEDIATE_VALUE 3 |
#define BRW_REGISTER_TYPE_UD 0 |
#define BRW_REGISTER_TYPE_D 1 |
#define BRW_REGISTER_TYPE_UW 2 |
#define BRW_REGISTER_TYPE_W 3 |
#define BRW_REGISTER_TYPE_UB 4 |
#define BRW_REGISTER_TYPE_B 5 |
#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ |
#define BRW_REGISTER_TYPE_HF 6 |
#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ |
#define BRW_REGISTER_TYPE_F 7 |
/* SNB adds 3-src instructions (MAD and LRP) that only operate on floats, so |
* the types were implied. IVB adds BFE and BFI2 that operate on doublewords |
* and unsigned doublewords, so a new field is also available in the da3src |
* struct (part of struct brw_instruction.bits1 in brw_structs.h) to select |
* dst and shared-src types. The values are different from BRW_REGISTER_TYPE_*. |
*/ |
#define BRW_3SRC_TYPE_F 0 |
#define BRW_3SRC_TYPE_D 1 |
#define BRW_3SRC_TYPE_UD 2 |
#define BRW_3SRC_TYPE_DF 3 |
#define BRW_ARF_NULL 0x00 |
#define BRW_ARF_ADDRESS 0x10 |
#define BRW_ARF_ACCUMULATOR 0x20 |
#define BRW_ARF_FLAG 0x30 |
#define BRW_ARF_MASK 0x40 |
#define BRW_ARF_MASK_STACK 0x50 |
#define BRW_ARF_MASK_STACK_DEPTH 0x60 |
#define BRW_ARF_STATE 0x70 |
#define BRW_ARF_CONTROL 0x80 |
#define BRW_ARF_NOTIFICATION_COUNT 0x90 |
#define BRW_ARF_IP 0xA0 |
#define BRW_ARF_TDR 0xB0 |
#define BRW_ARF_TIMESTAMP 0xC0 |
#define BRW_MRF_COMPR4 (1 << 7) |
#define BRW_AMASK 0 |
#define BRW_IMASK 1 |
#define BRW_LMASK 2 |
#define BRW_CMASK 3 |
#define BRW_THREAD_NORMAL 0 |
#define BRW_THREAD_ATOMIC 1 |
#define BRW_THREAD_SWITCH 2 |
#define BRW_VERTICAL_STRIDE_0 0 |
#define BRW_VERTICAL_STRIDE_1 1 |
#define BRW_VERTICAL_STRIDE_2 2 |
#define BRW_VERTICAL_STRIDE_4 3 |
#define BRW_VERTICAL_STRIDE_8 4 |
#define BRW_VERTICAL_STRIDE_16 5 |
#define BRW_VERTICAL_STRIDE_32 6 |
#define BRW_VERTICAL_STRIDE_64 7 |
#define BRW_VERTICAL_STRIDE_128 8 |
#define BRW_VERTICAL_STRIDE_256 9 |
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF |
#define BRW_WIDTH_1 0 |
#define BRW_WIDTH_2 1 |
#define BRW_WIDTH_4 2 |
#define BRW_WIDTH_8 3 |
#define BRW_WIDTH_16 4 |
#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 |
#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 |
#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 |
#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 |
#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 |
#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 |
#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 |
#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 |
#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 |
#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 |
#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 |
#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 |
#define BRW_POLYGON_FACING_FRONT 0 |
#define BRW_POLYGON_FACING_BACK 1 |
/** |
* Message target: Shared Function ID for where to SEND a message. |
* |
* These are enumerated in the ISA reference under "send - Send Message". |
* In particular, see the following tables: |
* - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" |
* - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" |
* - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / |
* Overview / GPE Function IDs |
*/ |
enum brw_message_target { |
BRW_SFID_NULL = 0, |
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ |
BRW_SFID_SAMPLER = 2, |
BRW_SFID_MESSAGE_GATEWAY = 3, |
BRW_SFID_DATAPORT_READ = 4, |
BRW_SFID_DATAPORT_WRITE = 5, |
BRW_SFID_URB = 6, |
BRW_SFID_THREAD_SPAWNER = 7, |
GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, |
GEN6_SFID_DATAPORT_RENDER_CACHE = 5, |
GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, |
GEN7_SFID_DATAPORT_DATA_CACHE = 10, |
HSW_SFID_DATAPORT_DATA_CACHE_1 = 12, |
}; |
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 |
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 |
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 |
#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 |
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 |
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 |
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 |
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 |
#define GEN5_SAMPLER_MESSAGE_LOD 9 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 |
#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20 |
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29 |
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30 |
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31 |
/* for GEN5 only */ |
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 |
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 |
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 |
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 |
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 |
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 |
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 |
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 |
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 |
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 |
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 |
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 |
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 |
/* This one stays the same across generations. */ |
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 |
/* GEN4 */ |
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 |
#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 |
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 |
/* G45, GEN5 */ |
#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 |
#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 |
#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 |
#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 |
#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 |
/* GEN6 */ |
#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 |
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 |
#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 |
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 |
#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 |
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 |
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 |
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 |
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 |
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 |
#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 |
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 |
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 |
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 |
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 |
/* GEN6 */ |
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 |
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 |
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 |
#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 |
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 |
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 |
#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 |
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 |
/* GEN7 */ |
#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10 |
#define GEN7_DATAPORT_DC_OWORD_BLOCK_READ 0 |
#define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ 1 |
#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ 2 |
#define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ 3 |
#define GEN7_DATAPORT_DC_BYTE_SCATTERED_READ 4 |
#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ 5 |
#define GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP 6 |
#define GEN7_DATAPORT_DC_MEMORY_FENCE 7 |
#define GEN7_DATAPORT_DC_OWORD_BLOCK_WRITE 8 |
#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE 10 |
#define GEN7_DATAPORT_DC_DWORD_SCATTERED_WRITE 11 |
#define GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE 12 |
#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE 13 |
/* HSW */ |
#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ 0 |
#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ 1 |
#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_READ 2 |
#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_READ 3 |
#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ 4 |
#define HSW_DATAPORT_DC_PORT0_MEMORY_FENCE 7 |
#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_WRITE 8 |
#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_WRITE 10 |
#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_WRITE 11 |
#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE 12 |
#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ 1 |
#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP 2 |
#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2 3 |
#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ 4 |
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ 5 |
#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP 6 |
#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 7 |
#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE 9 |
#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE 10 |
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11 |
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12 |
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13 |
/* dataport atomic operations. */ |
#define BRW_AOP_AND 1 |
#define BRW_AOP_OR 2 |
#define BRW_AOP_XOR 3 |
#define BRW_AOP_MOV 4 |
#define BRW_AOP_INC 5 |
#define BRW_AOP_DEC 6 |
#define BRW_AOP_ADD 7 |
#define BRW_AOP_SUB 8 |
#define BRW_AOP_REVSUB 9 |
#define BRW_AOP_IMAX 10 |
#define BRW_AOP_IMIN 11 |
#define BRW_AOP_UMAX 12 |
#define BRW_AOP_UMIN 13 |
#define BRW_AOP_CMPWR 14 |
#define BRW_AOP_PREDEC 15 |
#define BRW_MATH_FUNCTION_INV 1 |
#define BRW_MATH_FUNCTION_LOG 2 |
#define BRW_MATH_FUNCTION_EXP 3 |
#define BRW_MATH_FUNCTION_SQRT 4 |
#define BRW_MATH_FUNCTION_RSQ 5 |
#define BRW_MATH_FUNCTION_SIN 6 |
#define BRW_MATH_FUNCTION_COS 7 |
#define BRW_MATH_FUNCTION_SINCOS 8 /* gen4, gen5 */ |
#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ |
#define BRW_MATH_FUNCTION_POW 10 |
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 |
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 |
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 |
#define BRW_MATH_INTEGER_UNSIGNED 0 |
#define BRW_MATH_INTEGER_SIGNED 1 |
#define BRW_MATH_PRECISION_FULL 0 |
#define BRW_MATH_PRECISION_PARTIAL 1 |
#define BRW_MATH_SATURATE_NONE 0 |
#define BRW_MATH_SATURATE_SATURATE 1 |
#define BRW_MATH_DATA_VECTOR 0 |
#define BRW_MATH_DATA_SCALAR 1 |
#define BRW_URB_OPCODE_WRITE 0 |
#define BRW_URB_SWIZZLE_NONE 0 |
#define BRW_URB_SWIZZLE_INTERLEAVE 1 |
#define BRW_URB_SWIZZLE_TRANSPOSE 2 |
#define BRW_SCRATCH_SPACE_SIZE_1K 0 |
#define BRW_SCRATCH_SPACE_SIZE_2K 1 |
#define BRW_SCRATCH_SPACE_SIZE_4K 2 |
#define BRW_SCRATCH_SPACE_SIZE_8K 3 |
#define BRW_SCRATCH_SPACE_SIZE_16K 4 |
#define BRW_SCRATCH_SPACE_SIZE_32K 5 |
#define BRW_SCRATCH_SPACE_SIZE_64K 6 |
#define BRW_SCRATCH_SPACE_SIZE_128K 7 |
#define BRW_SCRATCH_SPACE_SIZE_256K 8 |
#define BRW_SCRATCH_SPACE_SIZE_512K 9 |
#define BRW_SCRATCH_SPACE_SIZE_1M 10 |
#define BRW_SCRATCH_SPACE_SIZE_2M 11 |
#define CMD_URB_FENCE 0x6000 |
#define CMD_CS_URB_STATE 0x6001 |
#define CMD_CONST_BUFFER 0x6002 |
#define CMD_STATE_BASE_ADDRESS 0x6101 |
#define CMD_STATE_SIP 0x6102 |
#define CMD_PIPELINE_SELECT_965 0x6104 |
#define CMD_PIPELINE_SELECT_GM45 0x6904 |
#define _3DSTATE_PIPELINED_POINTERS 0x7800 |
#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801 |
# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) |
# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) |
# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) |
#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GEN7+ */ |
#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GEN7+ */ |
#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GEN7+ */ |
#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */ |
#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */ |
#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ |
# define PS_SAMPLER_STATE_CHANGE (1 << 12) |
# define GS_SAMPLER_STATE_CHANGE (1 << 9) |
# define VS_SAMPLER_STATE_CHANGE (1 << 8) |
/* DW1: VS */ |
/* DW2: GS */ |
/* DW3: PS */ |
#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */ |
#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */ |
#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */ |
#define _3DSTATE_VERTEX_BUFFERS 0x7808 |
# define BRW_VB0_INDEX_SHIFT 27 |
# define GEN6_VB0_INDEX_SHIFT 26 |
# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) |
# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) |
# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20) |
# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20) |
# define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14) |
# define BRW_VB0_PITCH_SHIFT 0 |
#define _3DSTATE_VERTEX_ELEMENTS 0x7809 |
# define BRW_VE0_INDEX_SHIFT 27 |
# define GEN6_VE0_INDEX_SHIFT 26 |
# define BRW_VE0_FORMAT_SHIFT 16 |
# define BRW_VE0_VALID (1 << 26) |
# define GEN6_VE0_VALID (1 << 25) |
# define GEN6_VE0_EDGE_FLAG_ENABLE (1 << 15) |
# define BRW_VE0_SRC_OFFSET_SHIFT 0 |
# define BRW_VE1_COMPONENT_NOSTORE 0 |
# define BRW_VE1_COMPONENT_STORE_SRC 1 |
# define BRW_VE1_COMPONENT_STORE_0 2 |
# define BRW_VE1_COMPONENT_STORE_1_FLT 3 |
# define BRW_VE1_COMPONENT_STORE_1_INT 4 |
# define BRW_VE1_COMPONENT_STORE_VID 5 |
# define BRW_VE1_COMPONENT_STORE_IID 6 |
# define BRW_VE1_COMPONENT_STORE_PID 7 |
# define BRW_VE1_COMPONENT_0_SHIFT 28 |
# define BRW_VE1_COMPONENT_1_SHIFT 24 |
# define BRW_VE1_COMPONENT_2_SHIFT 20 |
# define BRW_VE1_COMPONENT_3_SHIFT 16 |
# define BRW_VE1_DST_OFFSET_SHIFT 0 |
#define CMD_INDEX_BUFFER 0x780a |
#define GEN4_3DSTATE_VF_STATISTICS 0x780b |
#define GM45_3DSTATE_VF_STATISTICS 0x680b |
#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ |
#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */ |
#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */ |
#define _3DSTATE_URB 0x7805 /* GEN6 */ |
# define GEN6_URB_VS_SIZE_SHIFT 16 |
# define GEN6_URB_VS_ENTRIES_SHIFT 0 |
# define GEN6_URB_GS_ENTRIES_SHIFT 8 |
# define GEN6_URB_GS_SIZE_SHIFT 0 |
#define _3DSTATE_VF 0x780c /* GEN7.5+ */ |
#define HSW_CUT_INDEX_ENABLE (1 << 8) |
#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */ |
#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */ |
#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */ |
#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */ |
# define GEN7_URB_ENTRY_SIZE_SHIFT 16 |
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 |
#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ |
#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */ |
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 |
#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ |
# define GEN6_CC_VIEWPORT_MODIFY (1 << 12) |
# define GEN6_SF_VIEWPORT_MODIFY (1 << 11) |
# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) |
#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */ |
#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */ |
#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ |
#define _3DSTATE_VS 0x7810 /* GEN6+ */ |
/* DW2 */ |
# define GEN6_VS_SPF_MODE (1 << 31) |
# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) |
# define GEN6_VS_SAMPLER_COUNT_SHIFT 27 |
# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 |
# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) |
# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) |
/* DW4 */ |
# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 |
# define GEN6_VS_URB_READ_LENGTH_SHIFT 11 |
# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4 |
/* DW5 */ |
# define GEN6_VS_MAX_THREADS_SHIFT 25 |
# define HSW_VS_MAX_THREADS_SHIFT 23 |
# define GEN6_VS_STATISTICS_ENABLE (1 << 10) |
# define GEN6_VS_CACHE_DISABLE (1 << 1) |
# define GEN6_VS_ENABLE (1 << 0) |
#define _3DSTATE_GS 0x7811 /* GEN6+ */ |
/* DW2 */ |
# define GEN6_GS_SPF_MODE (1 << 31) |
# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) |
# define GEN6_GS_SAMPLER_COUNT_SHIFT 27 |
# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 |
# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) |
# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16) |
/* DW4 */ |
# define GEN6_GS_URB_READ_LENGTH_SHIFT 11 |
# define GEN7_GS_INCLUDE_VERTEX_HANDLES (1 << 10) |
# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4 |
# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0 |
/* DW5 */ |
# define GEN6_GS_MAX_THREADS_SHIFT 25 |
# define GEN6_GS_STATISTICS_ENABLE (1 << 10) |
# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9) |
# define GEN6_GS_RENDERING_ENABLE (1 << 8) |
# define GEN7_GS_ENABLE (1 << 0) |
/* DW6 */ |
# define GEN6_GS_REORDER (1 << 30) |
# define GEN6_GS_DISCARD_ADJACENCY (1 << 29) |
# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28) |
# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27) |
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16 |
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) |
# define GEN6_GS_ENABLE (1 << 15) |
# define BRW_GS_EDGE_INDICATOR_0 (1 << 8) |
# define BRW_GS_EDGE_INDICATOR_1 (1 << 9) |
#define _3DSTATE_HS 0x781B /* GEN7+ */ |
#define _3DSTATE_TE 0x781C /* GEN7+ */ |
#define _3DSTATE_DS 0x781D /* GEN7+ */ |
#define _3DSTATE_CLIP 0x7812 /* GEN6+ */ |
/* DW1 */ |
# define GEN7_CLIP_WINDING_CW (0 << 20) |
# define GEN7_CLIP_WINDING_CCW (1 << 20) |
# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19) |
# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19) |
# define GEN7_CLIP_EARLY_CULL (1 << 18) |
# define GEN7_CLIP_CULLMODE_BOTH (0 << 16) |
# define GEN7_CLIP_CULLMODE_NONE (1 << 16) |
# define GEN7_CLIP_CULLMODE_FRONT (2 << 16) |
# define GEN7_CLIP_CULLMODE_BACK (3 << 16) |
# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) |
/** |
* Just does cheap culling based on the clip distance. Bits must be |
* disjoint with USER_CLIP_CLIP_DISTANCE bits. |
*/ |
# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 |
/* DW2 */ |
# define GEN6_CLIP_ENABLE (1 << 31) |
# define GEN6_CLIP_API_OGL (0 << 30) |
# define GEN6_CLIP_API_D3D (1 << 30) |
# define GEN6_CLIP_XY_TEST (1 << 28) |
# define GEN6_CLIP_Z_TEST (1 << 27) |
# define GEN6_CLIP_GB_TEST (1 << 26) |
/** 8-bit field of which user clip distances to clip aganist. */ |
# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 |
# define GEN6_CLIP_MODE_NORMAL (0 << 13) |
# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) |
# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) |
# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9) |
# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8) |
# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4 |
# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2 |
# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0 |
/* DW3 */ |
# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17 |
# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 |
# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) |
#define _3DSTATE_SF 0x7813 /* GEN6+ */ |
/* DW1 (for gen6) */ |
# define GEN6_SF_NUM_OUTPUTS_SHIFT 22 |
# define GEN6_SF_SWIZZLE_ENABLE (1 << 21) |
# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20) |
# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20) |
# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 |
# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 |
/* DW2 */ |
# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11) |
# define GEN6_SF_STATISTICS_ENABLE (1 << 10) |
# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9) |
# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8) |
# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7) |
# define GEN6_SF_FRONT_SOLID (0 << 5) |
# define GEN6_SF_FRONT_WIREFRAME (1 << 5) |
# define GEN6_SF_FRONT_POINT (2 << 5) |
# define GEN6_SF_BACK_SOLID (0 << 3) |
# define GEN6_SF_BACK_WIREFRAME (1 << 3) |
# define GEN6_SF_BACK_POINT (2 << 3) |
# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1) |
# define GEN6_SF_WINDING_CCW (1 << 0) |
/* DW3 */ |
# define GEN6_SF_LINE_AA_ENABLE (1 << 31) |
# define GEN6_SF_CULL_BOTH (0 << 29) |
# define GEN6_SF_CULL_NONE (1 << 29) |
# define GEN6_SF_CULL_FRONT (2 << 29) |
# define GEN6_SF_CULL_BACK (3 << 29) |
# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */ |
# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16) |
# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16) |
# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16) |
# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16) |
# define GEN6_SF_SCISSOR_ENABLE (1 << 11) |
# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8) |
# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8) |
# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8) |
# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8) |
/* DW4 */ |
# define GEN6_SF_TRI_PROVOKE_SHIFT 29 |
# define GEN6_SF_LINE_PROVOKE_SHIFT 27 |
# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25 |
# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14) |
# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14) |
# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12) |
# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12) |
# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11) |
# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */ |
/* DW5: depth offset constant */ |
/* DW6: depth offset scale */ |
/* DW7: depth offset clamp */ |
/* DW8 */ |
# define ATTRIBUTE_1_OVERRIDE_W (1 << 31) |
# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30) |
# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29) |
# define ATTRIBUTE_1_OVERRIDE_X (1 << 28) |
# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25 |
# define ATTRIBUTE_1_SWIZZLE_SHIFT 22 |
# define ATTRIBUTE_1_SOURCE_SHIFT 16 |
# define ATTRIBUTE_0_OVERRIDE_W (1 << 15) |
# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14) |
# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13) |
# define ATTRIBUTE_0_OVERRIDE_X (1 << 12) |
# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 |
# define ATTRIBUTE_0_SWIZZLE_SHIFT 6 |
# define ATTRIBUTE_0_SOURCE_SHIFT 0 |
# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 |
# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 |
# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 |
# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 |
# define ATTRIBUTE_SWIZZLE_SHIFT 6 |
/* DW16: Point sprite texture coordinate enables */ |
/* DW17: Constant interpolation enables */ |
/* DW18: attr 0-7 wrap shortest enables */ |
/* DW19: attr 8-16 wrap shortest enables */ |
/* On GEN7, many fields of 3DSTATE_SF were split out into a new command: |
* 3DSTATE_SBE. The remaining fields live in different DWords, but retain |
* the same bit-offset. The only new field: |
*/ |
/* GEN7/DW1: */ |
# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12 |
/* GEN7/DW2: */ |
# define HSW_SF_LINE_STIPPLE_ENABLE 14 |
#define _3DSTATE_SBE 0x781F /* GEN7+ */ |
/* DW1 */ |
# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28) |
# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22 |
# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21) |
# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20) |
# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11 |
# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4 |
/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */ |
/* DW10: Point sprite texture coordinate enables */ |
/* DW11: Constant interpolation enables */ |
/* DW12: attr 0-7 wrap shortest enables */ |
/* DW13: attr 8-16 wrap shortest enables */ |
enum brw_wm_barycentric_interp_mode { |
BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0, |
BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1, |
BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2, |
BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3, |
BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4, |
BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5, |
BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6 |
}; |
#define BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS \ |
((1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC) | \ |
(1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC) | \ |
(1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC)) |
#define _3DSTATE_WM 0x7814 /* GEN6+ */ |
/* DW1: kernel pointer */ |
/* DW2 */ |
# define GEN6_WM_SPF_MODE (1 << 31) |
# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) |
# define GEN6_WM_SAMPLER_COUNT_SHIFT 27 |
# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 |
# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) |
# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) |
/* DW3: scratch space */ |
/* DW4 */ |
# define GEN6_WM_STATISTICS_ENABLE (1 << 31) |
# define GEN6_WM_DEPTH_CLEAR (1 << 30) |
# define GEN6_WM_DEPTH_RESOLVE (1 << 28) |
# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) |
# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16 |
# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8 |
# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0 |
/* DW5 */ |
# define GEN6_WM_MAX_THREADS_SHIFT 25 |
# define GEN6_WM_KILL_ENABLE (1 << 22) |
# define GEN6_WM_COMPUTED_DEPTH (1 << 21) |
# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20) |
# define GEN6_WM_DISPATCH_ENABLE (1 << 19) |
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16) |
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16) |
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16) |
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16) |
# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14) |
# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14) |
# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14) |
# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14) |
# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13) |
# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11) |
# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9) |
# define GEN6_WM_USES_SOURCE_W (1 << 8) |
# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7) |
# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2) |
# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1) |
# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0) |
/* DW6 */ |
# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20 |
# define GEN6_WM_POSOFFSET_NONE (0 << 18) |
# define GEN6_WM_POSOFFSET_CENTROID (2 << 18) |
# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18) |
# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16) |
# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16) |
# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16) |
# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) |
# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) |
# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) |
# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) |
# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) |
# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) |
# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10 |
# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9) |
# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1) |
# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1) |
# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1) |
# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1) |
# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0) |
# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0) |
/* DW7: kernel 1 pointer */ |
/* DW8: kernel 2 pointer */ |
#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */ |
#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */ |
#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */ |
# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) |
# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) |
# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) |
# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) |
#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */ |
#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */ |
#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */ |
/* DW1 */ |
# define SO_FUNCTION_ENABLE (1 << 31) |
# define SO_RENDERING_DISABLE (1 << 30) |
/* This selects which incoming rendering stream goes down the pipeline. The |
* rendering stream is 0 if not defined by special cases in the GS state. |
*/ |
# define SO_RENDER_STREAM_SELECT_SHIFT 27 |
# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27) |
/* Controls reordering of TRISTRIP_* elements in stream output (not rendering). |
*/ |
# define SO_REORDER_TRAILING (1 << 26) |
/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */ |
# define SO_STATISTICS_ENABLE (1 << 25) |
# define SO_BUFFER_ENABLE(n) (1 << (8 + (n))) |
/* DW2 */ |
# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29 |
# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29) |
# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24 |
# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24) |
# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21 |
# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21) |
# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16 |
# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16) |
# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13 |
# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13) |
# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8 |
# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8) |
# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5 |
# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5) |
# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0 |
# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0) |
/* 3DSTATE_WM for Gen7 */ |
/* DW1 */ |
# define GEN7_WM_STATISTICS_ENABLE (1 << 31) |
# define GEN7_WM_DEPTH_CLEAR (1 << 30) |
# define GEN7_WM_DISPATCH_ENABLE (1 << 29) |
# define GEN7_WM_DEPTH_RESOLVE (1 << 28) |
# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27) |
# define GEN7_WM_KILL_ENABLE (1 << 25) |
# define GEN7_WM_PSCDEPTH_OFF (0 << 23) |
# define GEN7_WM_PSCDEPTH_ON (1 << 23) |
# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23) |
# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23) |
# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) |
# define GEN7_WM_USES_SOURCE_W (1 << 19) |
# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) |
# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) |
# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17) |
# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11 |
# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10) |
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8) |
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8) |
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8) |
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8) |
# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6) |
# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6) |
# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6) |
# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6) |
# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4) |
# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3) |
# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2) |
# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0) |
# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0) |
# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0) |
# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0) |
/* DW2 */ |
# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31) |
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) |
#define _3DSTATE_PS 0x7820 /* GEN7+ */ |
/* DW1: kernel pointer */ |
/* DW2 */ |
# define GEN7_PS_SPF_MODE (1 << 31) |
# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30) |
# define GEN7_PS_SAMPLER_COUNT_SHIFT 27 |
# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 |
# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) |
# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16) |
/* DW3: scratch space */ |
/* DW4 */ |
# define IVB_PS_MAX_THREADS_SHIFT 24 |
# define HSW_PS_MAX_THREADS_SHIFT 23 |
# define HSW_PS_SAMPLE_MASK_SHIFT 12 |
# define HSW_PS_SAMPLE_MASK_MASK INTEL_MASK(19, 12) |
# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11) |
# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10) |
# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9) |
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) |
# define GEN7_PS_POSOFFSET_NONE (0 << 3) |
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3) |
# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) |
# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2) |
# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1) |
# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0) |
/* DW5 */ |
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16 |
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8 |
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0 |
/* DW6: kernel 1 pointer */ |
/* DW7: kernel 2 pointer */ |
#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */ |
#define _3DSTATE_DRAWING_RECTANGLE 0x7900 |
#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901 |
#define _3DSTATE_CHROMA_KEY 0x7904 |
#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GEN4-6 */ |
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906 |
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907 |
#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908 |
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 |
#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */ |
#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */ |
/* DW1 */ |
# define SVB_INDEX_SHIFT 29 |
# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ |
/* DW2: SVB index */ |
/* DW3: SVB maximum index */ |
#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */ |
/* DW1 */ |
# define MS_PIXEL_LOCATION_CENTER (0 << 4) |
# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) |
# define MS_NUMSAMPLES_1 (0 << 1) |
# define MS_NUMSAMPLES_4 (2 << 1) |
# define MS_NUMSAMPLES_8 (3 << 1) |
#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ |
#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ |
#define GEN7_3DSTATE_CLEAR_PARAMS 0x7804 |
#define GEN7_3DSTATE_DEPTH_BUFFER 0x7805 |
#define GEN7_3DSTATE_STENCIL_BUFFER 0x7806 |
# define HSW_STENCIL_ENABLED (1 << 31) |
#define GEN7_3DSTATE_HIER_DEPTH_BUFFER 0x7807 |
#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */ |
# define GEN5_DEPTH_CLEAR_VALID (1 << 15) |
/* DW1: depth clear value */ |
/* DW2 */ |
# define GEN7_DEPTH_CLEAR_VALID (1 << 0) |
#define _3DSTATE_SO_DECL_LIST 0x7917 /* GEN7+ */ |
/* DW1 */ |
# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12 |
# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12) |
# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8 |
# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8) |
# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4 |
# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4) |
# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0 |
# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0) |
/* DW2 */ |
# define SO_NUM_ENTRIES_3_SHIFT 24 |
# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24) |
# define SO_NUM_ENTRIES_2_SHIFT 16 |
# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16) |
# define SO_NUM_ENTRIES_1_SHIFT 8 |
# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8) |
# define SO_NUM_ENTRIES_0_SHIFT 0 |
# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0) |
/* SO_DECL DW0 */ |
# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12 |
# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12) |
# define SO_DECL_HOLE_FLAG (1 << 11) |
# define SO_DECL_REGISTER_INDEX_SHIFT 4 |
# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4) |
# define SO_DECL_COMPONENT_MASK_SHIFT 0 |
# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0) |
#define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */ |
/* DW1 */ |
# define SO_BUFFER_INDEX_SHIFT 29 |
# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29) |
# define SO_BUFFER_PITCH_SHIFT 0 |
# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0) |
/* DW2: start address */ |
/* DW3: end address. */ |
#define CMD_PIPE_CONTROL 0x7a00 |
#define CMD_MI_FLUSH 0x0200 |
/* Bitfields for the URB_WRITE message, DW2 of message header: */ |
#define URB_WRITE_PRIM_END 0x1 |
#define URB_WRITE_PRIM_START 0x2 |
#define URB_WRITE_PRIM_TYPE_SHIFT 2 |
/* Maximum number of entries that can be addressed using a binding table |
* pointer of type SURFTYPE_BUFFER |
*/ |
#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27) |
#include "intel_chipset.h" |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/brw_structs.h |
---|
0,0 → 1,1453 |
/* |
Copyright (C) Intel Corp. 2006. All Rights Reserved. |
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
develop this 3D driver. |
Permission is hereby granted, free of charge, to any person obtaining |
a copy of this software and associated documentation files (the |
"Software"), to deal in the Software without restriction, including |
without limitation the rights to use, copy, modify, merge, publish, |
distribute, sublicense, and/or sell copies of the Software, and to |
permit persons to whom the Software is furnished to do so, subject to |
the following conditions: |
The above copyright notice and this permission notice (including the |
next paragraph) shall be included in all copies or substantial |
portions of the Software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
**********************************************************************/ |
/* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#ifndef BRW_STRUCTS_H |
#define BRW_STRUCTS_H |
/* These seem to be passed around as function args, so it works out |
* better to keep them as #defines: |
*/ |
#define BRW_FLUSH_READ_CACHE 0x1 |
#define BRW_FLUSH_STATE_CACHE 0x2 |
#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 |
#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 |
struct brw_urb_fence |
{ |
struct |
{ |
GLuint length:8; |
GLuint vs_realloc:1; |
GLuint gs_realloc:1; |
GLuint clp_realloc:1; |
GLuint sf_realloc:1; |
GLuint vfe_realloc:1; |
GLuint cs_realloc:1; |
GLuint pad:2; |
GLuint opcode:16; |
} header; |
struct |
{ |
GLuint vs_fence:10; |
GLuint gs_fence:10; |
GLuint clp_fence:10; |
GLuint pad:2; |
} bits0; |
struct |
{ |
GLuint sf_fence:10; |
GLuint vf_fence:10; |
GLuint cs_fence:11; |
GLuint pad:1; |
} bits1; |
}; |
/* State structs for the various fixed function units: |
*/ |
struct thread0 |
{ |
GLuint pad0:1; |
GLuint grf_reg_count:3; |
GLuint pad1:2; |
GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ |
}; |
struct thread1 |
{ |
GLuint ext_halt_exception_enable:1; |
GLuint sw_exception_enable:1; |
GLuint mask_stack_exception_enable:1; |
GLuint timeout_exception_enable:1; |
GLuint illegal_op_exception_enable:1; |
GLuint pad0:3; |
GLuint depth_coef_urb_read_offset:6; /* WM only */ |
GLuint pad1:2; |
GLuint floating_point_mode:1; |
GLuint thread_priority:1; |
GLuint binding_table_entry_count:8; |
GLuint pad3:5; |
GLuint single_program_flow:1; |
}; |
struct thread2 |
{ |
GLuint per_thread_scratch_space:4; |
GLuint pad0:6; |
GLuint scratch_space_base_pointer:22; |
}; |
struct thread3 |
{ |
GLuint dispatch_grf_start_reg:4; |
GLuint urb_entry_read_offset:6; |
GLuint pad0:1; |
GLuint urb_entry_read_length:6; |
GLuint pad1:1; |
GLuint const_urb_entry_read_offset:6; |
GLuint pad2:1; |
GLuint const_urb_entry_read_length:6; |
GLuint pad3:1; |
}; |
struct brw_clip_unit_state |
{ |
struct thread0 thread0; |
struct |
{ |
GLuint pad0:7; |
GLuint sw_exception_enable:1; |
GLuint pad1:3; |
GLuint mask_stack_exception_enable:1; |
GLuint pad2:1; |
GLuint illegal_op_exception_enable:1; |
GLuint pad3:2; |
GLuint floating_point_mode:1; |
GLuint thread_priority:1; |
GLuint binding_table_entry_count:8; |
GLuint pad4:5; |
GLuint single_program_flow:1; |
} thread1; |
struct thread2 thread2; |
struct thread3 thread3; |
struct |
{ |
GLuint pad0:9; |
GLuint gs_output_stats:1; /* not always */ |
GLuint stats_enable:1; |
GLuint nr_urb_entries:7; |
GLuint pad1:1; |
GLuint urb_entry_allocation_size:5; |
GLuint pad2:1; |
GLuint max_threads:5; /* may be less */ |
GLuint pad3:2; |
} thread4; |
struct |
{ |
GLuint pad0:13; |
GLuint clip_mode:3; |
GLuint userclip_enable_flags:8; |
GLuint userclip_must_clip:1; |
GLuint negative_w_clip_test:1; |
GLuint guard_band_enable:1; |
GLuint viewport_z_clip_enable:1; |
GLuint viewport_xy_clip_enable:1; |
GLuint vertex_position_space:1; |
GLuint api_mode:1; |
GLuint pad2:1; |
} clip5; |
struct |
{ |
GLuint pad0:5; |
GLuint clipper_viewport_state_ptr:27; |
} clip6; |
GLfloat viewport_xmin; |
GLfloat viewport_xmax; |
GLfloat viewport_ymin; |
GLfloat viewport_ymax; |
}; |
struct gen6_blend_state |
{ |
struct { |
GLuint dest_blend_factor:5; |
GLuint source_blend_factor:5; |
GLuint pad3:1; |
GLuint blend_func:3; |
GLuint pad2:1; |
GLuint ia_dest_blend_factor:5; |
GLuint ia_source_blend_factor:5; |
GLuint pad1:1; |
GLuint ia_blend_func:3; |
GLuint pad0:1; |
GLuint ia_blend_enable:1; |
GLuint blend_enable:1; |
} blend0; |
struct { |
GLuint post_blend_clamp_enable:1; |
GLuint pre_blend_clamp_enable:1; |
GLuint clamp_range:2; |
GLuint pad0:4; |
GLuint x_dither_offset:2; |
GLuint y_dither_offset:2; |
GLuint dither_enable:1; |
GLuint alpha_test_func:3; |
GLuint alpha_test_enable:1; |
GLuint pad1:1; |
GLuint logic_op_func:4; |
GLuint logic_op_enable:1; |
GLuint pad2:1; |
GLuint write_disable_b:1; |
GLuint write_disable_g:1; |
GLuint write_disable_r:1; |
GLuint write_disable_a:1; |
GLuint pad3:1; |
GLuint alpha_to_coverage_dither:1; |
GLuint alpha_to_one:1; |
GLuint alpha_to_coverage:1; |
} blend1; |
}; |
struct gen6_color_calc_state |
{ |
struct { |
GLuint alpha_test_format:1; |
GLuint pad0:14; |
GLuint round_disable:1; |
GLuint bf_stencil_ref:8; |
GLuint stencil_ref:8; |
} cc0; |
union { |
GLfloat alpha_ref_f; |
struct { |
GLuint ui:8; |
GLuint pad0:24; |
} alpha_ref_fi; |
} cc1; |
GLfloat constant_r; |
GLfloat constant_g; |
GLfloat constant_b; |
GLfloat constant_a; |
}; |
struct gen6_depth_stencil_state |
{ |
struct { |
GLuint pad0:3; |
GLuint bf_stencil_pass_depth_pass_op:3; |
GLuint bf_stencil_pass_depth_fail_op:3; |
GLuint bf_stencil_fail_op:3; |
GLuint bf_stencil_func:3; |
GLuint bf_stencil_enable:1; |
GLuint pad1:2; |
GLuint stencil_write_enable:1; |
GLuint stencil_pass_depth_pass_op:3; |
GLuint stencil_pass_depth_fail_op:3; |
GLuint stencil_fail_op:3; |
GLuint stencil_func:3; |
GLuint stencil_enable:1; |
} ds0; |
struct { |
GLuint bf_stencil_write_mask:8; |
GLuint bf_stencil_test_mask:8; |
GLuint stencil_write_mask:8; |
GLuint stencil_test_mask:8; |
} ds1; |
struct { |
GLuint pad0:26; |
GLuint depth_write_enable:1; |
GLuint depth_test_func:3; |
GLuint pad1:1; |
GLuint depth_test_enable:1; |
} ds2; |
}; |
struct brw_cc_unit_state |
{ |
struct |
{ |
GLuint pad0:3; |
GLuint bf_stencil_pass_depth_pass_op:3; |
GLuint bf_stencil_pass_depth_fail_op:3; |
GLuint bf_stencil_fail_op:3; |
GLuint bf_stencil_func:3; |
GLuint bf_stencil_enable:1; |
GLuint pad1:2; |
GLuint stencil_write_enable:1; |
GLuint stencil_pass_depth_pass_op:3; |
GLuint stencil_pass_depth_fail_op:3; |
GLuint stencil_fail_op:3; |
GLuint stencil_func:3; |
GLuint stencil_enable:1; |
} cc0; |
struct |
{ |
GLuint bf_stencil_ref:8; |
GLuint stencil_write_mask:8; |
GLuint stencil_test_mask:8; |
GLuint stencil_ref:8; |
} cc1; |
struct |
{ |
GLuint logicop_enable:1; |
GLuint pad0:10; |
GLuint depth_write_enable:1; |
GLuint depth_test_function:3; |
GLuint depth_test:1; |
GLuint bf_stencil_write_mask:8; |
GLuint bf_stencil_test_mask:8; |
} cc2; |
struct |
{ |
GLuint pad0:8; |
GLuint alpha_test_func:3; |
GLuint alpha_test:1; |
GLuint blend_enable:1; |
GLuint ia_blend_enable:1; |
GLuint pad1:1; |
GLuint alpha_test_format:1; |
GLuint pad2:16; |
} cc3; |
struct |
{ |
GLuint pad0:5; |
GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ |
} cc4; |
struct |
{ |
GLuint pad0:2; |
GLuint ia_dest_blend_factor:5; |
GLuint ia_src_blend_factor:5; |
GLuint ia_blend_function:3; |
GLuint statistics_enable:1; |
GLuint logicop_func:4; |
GLuint pad1:11; |
GLuint dither_enable:1; |
} cc5; |
struct |
{ |
GLuint clamp_post_alpha_blend:1; |
GLuint clamp_pre_alpha_blend:1; |
GLuint clamp_range:2; |
GLuint pad0:11; |
GLuint y_dither_offset:2; |
GLuint x_dither_offset:2; |
GLuint dest_blend_factor:5; |
GLuint src_blend_factor:5; |
GLuint blend_function:3; |
} cc6; |
struct { |
union { |
GLfloat f; |
GLubyte ub[4]; |
} alpha_ref; |
} cc7; |
}; |
struct brw_sf_unit_state |
{ |
struct thread0 thread0; |
struct thread1 thread1; |
struct thread2 thread2; |
struct thread3 thread3; |
struct |
{ |
GLuint pad0:10; |
GLuint stats_enable:1; |
GLuint nr_urb_entries:7; |
GLuint pad1:1; |
GLuint urb_entry_allocation_size:5; |
GLuint pad2:1; |
GLuint max_threads:6; |
GLuint pad3:1; |
} thread4; |
struct |
{ |
GLuint front_winding:1; |
GLuint viewport_transform:1; |
GLuint pad0:3; |
GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ |
} sf5; |
struct |
{ |
GLuint pad0:9; |
GLuint dest_org_vbias:4; |
GLuint dest_org_hbias:4; |
GLuint scissor:1; |
GLuint disable_2x2_trifilter:1; |
GLuint disable_zero_pix_trifilter:1; |
GLuint point_rast_rule:2; |
GLuint line_endcap_aa_region_width:2; |
GLuint line_width:4; |
GLuint fast_scissor_disable:1; |
GLuint cull_mode:2; |
GLuint aa_enable:1; |
} sf6; |
struct |
{ |
GLuint point_size:11; |
GLuint use_point_size_state:1; |
GLuint subpixel_precision:1; |
GLuint sprite_point:1; |
GLuint pad0:10; |
GLuint aa_line_distance_mode:1; |
GLuint trifan_pv:2; |
GLuint linestrip_pv:2; |
GLuint tristrip_pv:2; |
GLuint line_last_pixel_enable:1; |
} sf7; |
}; |
struct gen6_scissor_rect |
{ |
GLuint xmin:16; |
GLuint ymin:16; |
GLuint xmax:16; |
GLuint ymax:16; |
}; |
struct brw_gs_unit_state |
{ |
struct thread0 thread0; |
struct thread1 thread1; |
struct thread2 thread2; |
struct thread3 thread3; |
struct |
{ |
GLuint pad0:8; |
GLuint rendering_enable:1; /* for Ironlake */ |
GLuint pad4:1; |
GLuint stats_enable:1; |
GLuint nr_urb_entries:7; |
GLuint pad1:1; |
GLuint urb_entry_allocation_size:5; |
GLuint pad2:1; |
GLuint max_threads:5; |
GLuint pad3:2; |
} thread4; |
struct |
{ |
GLuint sampler_count:3; |
GLuint pad0:2; |
GLuint sampler_state_pointer:27; |
} gs5; |
struct |
{ |
GLuint max_vp_index:4; |
GLuint pad0:12; |
GLuint svbi_post_inc_value:10; |
GLuint pad1:1; |
GLuint svbi_post_inc_enable:1; |
GLuint svbi_payload:1; |
GLuint discard_adjaceny:1; |
GLuint reorder_enable:1; |
GLuint pad2:1; |
} gs6; |
}; |
struct brw_vs_unit_state |
{ |
struct thread0 thread0; |
struct thread1 thread1; |
struct thread2 thread2; |
struct thread3 thread3; |
struct |
{ |
GLuint pad0:10; |
GLuint stats_enable:1; |
GLuint nr_urb_entries:7; |
GLuint pad1:1; |
GLuint urb_entry_allocation_size:5; |
GLuint pad2:1; |
GLuint max_threads:6; |
GLuint pad3:1; |
} thread4; |
struct |
{ |
GLuint sampler_count:3; |
GLuint pad0:2; |
GLuint sampler_state_pointer:27; |
} vs5; |
struct |
{ |
GLuint vs_enable:1; |
GLuint vert_cache_disable:1; |
GLuint pad0:30; |
} vs6; |
}; |
struct brw_wm_unit_state |
{ |
struct thread0 thread0; |
struct thread1 thread1; |
struct thread2 thread2; |
struct thread3 thread3; |
struct { |
GLuint stats_enable:1; |
GLuint depth_buffer_clear:1; |
GLuint sampler_count:3; |
GLuint sampler_state_pointer:27; |
} wm4; |
struct |
{ |
GLuint enable_8_pix:1; |
GLuint enable_16_pix:1; |
GLuint enable_32_pix:1; |
GLuint enable_con_32_pix:1; |
GLuint enable_con_64_pix:1; |
GLuint pad0:1; |
/* These next four bits are for Ironlake+ */ |
GLuint fast_span_coverage_enable:1; |
GLuint depth_buffer_clear:1; |
GLuint depth_buffer_resolve_enable:1; |
GLuint hierarchical_depth_buffer_resolve_enable:1; |
GLuint legacy_global_depth_bias:1; |
GLuint line_stipple:1; |
GLuint depth_offset:1; |
GLuint polygon_stipple:1; |
GLuint line_aa_region_width:2; |
GLuint line_endcap_aa_region_width:2; |
GLuint early_depth_test:1; |
GLuint thread_dispatch_enable:1; |
GLuint program_uses_depth:1; |
GLuint program_computes_depth:1; |
GLuint program_uses_killpixel:1; |
GLuint legacy_line_rast: 1; |
GLuint transposed_urb_read_enable:1; |
GLuint max_threads:7; |
} wm5; |
GLfloat global_depth_offset_constant; |
GLfloat global_depth_offset_scale; |
/* for Ironlake only */ |
struct { |
GLuint pad0:1; |
GLuint grf_reg_count_1:3; |
GLuint pad1:2; |
GLuint kernel_start_pointer_1:26; |
} wm8; |
struct { |
GLuint pad0:1; |
GLuint grf_reg_count_2:3; |
GLuint pad1:2; |
GLuint kernel_start_pointer_2:26; |
} wm9; |
struct { |
GLuint pad0:1; |
GLuint grf_reg_count_3:3; |
GLuint pad1:2; |
GLuint kernel_start_pointer_3:26; |
} wm10; |
}; |
struct brw_sampler_default_color { |
GLfloat color[4]; |
}; |
struct gen5_sampler_default_color { |
uint8_t ub[4]; |
float f[4]; |
uint16_t hf[4]; |
uint16_t us[4]; |
int16_t s[4]; |
uint8_t b[4]; |
}; |
struct brw_sampler_state |
{ |
struct |
{ |
GLuint shadow_function:3; |
GLuint lod_bias:11; |
GLuint min_filter:3; |
GLuint mag_filter:3; |
GLuint mip_filter:2; |
GLuint base_level:5; |
GLuint min_mag_neq:1; |
GLuint lod_preclamp:1; |
GLuint default_color_mode:1; |
GLuint pad0:1; |
GLuint disable:1; |
} ss0; |
struct |
{ |
GLuint r_wrap_mode:3; |
GLuint t_wrap_mode:3; |
GLuint s_wrap_mode:3; |
GLuint cube_control_mode:1; |
GLuint pad:2; |
GLuint max_lod:10; |
GLuint min_lod:10; |
} ss1; |
struct |
{ |
GLuint pad:5; |
GLuint default_color_pointer:27; |
} ss2; |
struct |
{ |
GLuint non_normalized_coord:1; |
GLuint pad:12; |
GLuint address_round:6; |
GLuint max_aniso:3; |
GLuint chroma_key_mode:1; |
GLuint chroma_key_index:2; |
GLuint chroma_key_enable:1; |
GLuint monochrome_filter_width:3; |
GLuint monochrome_filter_height:3; |
} ss3; |
}; |
struct gen7_sampler_state |
{ |
struct |
{ |
GLuint aniso_algorithm:1; |
GLuint lod_bias:13; |
GLuint min_filter:3; |
GLuint mag_filter:3; |
GLuint mip_filter:2; |
GLuint base_level:5; |
GLuint pad1:1; |
GLuint lod_preclamp:1; |
GLuint default_color_mode:1; |
GLuint pad0:1; |
GLuint disable:1; |
} ss0; |
struct |
{ |
GLuint cube_control_mode:1; |
GLuint shadow_function:3; |
GLuint pad:4; |
GLuint max_lod:12; |
GLuint min_lod:12; |
} ss1; |
struct |
{ |
GLuint pad:5; |
GLuint default_color_pointer:27; |
} ss2; |
struct |
{ |
GLuint r_wrap_mode:3; |
GLuint t_wrap_mode:3; |
GLuint s_wrap_mode:3; |
GLuint pad:1; |
GLuint non_normalized_coord:1; |
GLuint trilinear_quality:2; |
GLuint address_round:6; |
GLuint max_aniso:3; |
GLuint chroma_key_mode:1; |
GLuint chroma_key_index:2; |
GLuint chroma_key_enable:1; |
GLuint pad0:6; |
} ss3; |
}; |
struct brw_clipper_viewport |
{ |
GLfloat xmin; |
GLfloat xmax; |
GLfloat ymin; |
GLfloat ymax; |
}; |
struct brw_cc_viewport |
{ |
GLfloat min_depth; |
GLfloat max_depth; |
}; |
struct brw_sf_viewport |
{ |
struct { |
GLfloat m00; |
GLfloat m11; |
GLfloat m22; |
GLfloat m30; |
GLfloat m31; |
GLfloat m32; |
} viewport; |
/* scissor coordinates are inclusive */ |
struct { |
GLshort xmin; |
GLshort ymin; |
GLshort xmax; |
GLshort ymax; |
} scissor; |
}; |
struct gen6_sf_viewport { |
GLfloat m00; |
GLfloat m11; |
GLfloat m22; |
GLfloat m30; |
GLfloat m31; |
GLfloat m32; |
}; |
struct gen7_sf_clip_viewport { |
struct { |
GLfloat m00; |
GLfloat m11; |
GLfloat m22; |
GLfloat m30; |
GLfloat m31; |
GLfloat m32; |
} viewport; |
GLuint pad0[2]; |
struct { |
GLfloat xmin; |
GLfloat xmax; |
GLfloat ymin; |
GLfloat ymax; |
} guardband; |
GLfloat pad1[4]; |
}; |
struct brw_vertex_element_state |
{ |
struct |
{ |
GLuint src_offset:11; |
GLuint pad:5; |
GLuint src_format:9; |
GLuint pad0:1; |
GLuint valid:1; |
GLuint vertex_buffer_index:5; |
} ve0; |
struct |
{ |
GLuint dst_offset:8; |
GLuint pad:8; |
GLuint vfcomponent3:4; |
GLuint vfcomponent2:4; |
GLuint vfcomponent1:4; |
GLuint vfcomponent0:4; |
} ve1; |
}; |
struct brw_urb_immediate { |
GLuint opcode:4; |
GLuint offset:6; |
GLuint swizzle_control:2; |
GLuint pad:1; |
GLuint allocate:1; |
GLuint used:1; |
GLuint complete:1; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
}; |
/* Instruction format for the execution units: |
*/ |
struct brw_instruction |
{ |
struct |
{ |
GLuint opcode:7; |
GLuint pad:1; |
GLuint access_mode:1; |
GLuint mask_control:1; |
GLuint dependency_control:2; |
GLuint compression_control:2; /* gen6: quarter control */ |
GLuint thread_control:2; |
GLuint predicate_control:4; |
GLuint predicate_inverse:1; |
GLuint execution_size:3; |
/** |
* Conditional Modifier for most instructions. On Gen6+, this is also |
* used for the SEND instruction's Message Target/SFID. |
*/ |
GLuint destreg__conditionalmod:4; |
GLuint acc_wr_control:1; |
GLuint cmpt_control:1; |
GLuint debug_control:1; |
GLuint saturate:1; |
} header; |
union { |
struct |
{ |
GLuint dest_reg_file:2; |
GLuint dest_reg_type:3; |
GLuint src0_reg_file:2; |
GLuint src0_reg_type:3; |
GLuint src1_reg_file:2; |
GLuint src1_reg_type:3; |
GLuint nibctrl:1; /* gen7+ */ |
GLuint dest_subreg_nr:5; |
GLuint dest_reg_nr:8; |
GLuint dest_horiz_stride:2; |
GLuint dest_address_mode:1; |
} da1; |
struct |
{ |
GLuint dest_reg_file:2; |
GLuint dest_reg_type:3; |
GLuint src0_reg_file:2; |
GLuint src0_reg_type:3; |
GLuint src1_reg_file:2; /* 0x00000c00 */ |
GLuint src1_reg_type:3; /* 0x00007000 */ |
GLuint nibctrl:1; /* gen7+ */ |
GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ |
GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ |
GLuint dest_horiz_stride:2; |
GLuint dest_address_mode:1; |
} ia1; |
struct |
{ |
GLuint dest_reg_file:2; |
GLuint dest_reg_type:3; |
GLuint src0_reg_file:2; |
GLuint src0_reg_type:3; |
GLuint src1_reg_file:2; |
GLuint src1_reg_type:3; |
GLuint nibctrl:1; /* gen7+ */ |
GLuint dest_writemask:4; |
GLuint dest_subreg_nr:1; |
GLuint dest_reg_nr:8; |
GLuint dest_horiz_stride:2; |
GLuint dest_address_mode:1; |
} da16; |
struct |
{ |
GLuint dest_reg_file:2; |
GLuint dest_reg_type:3; |
GLuint src0_reg_file:2; |
GLuint src0_reg_type:3; |
GLuint src1_reg_file:2; |
GLuint src1_reg_type:3; |
GLuint nibctrl:1; /* gen7+ */ |
GLuint dest_writemask:4; |
GLint dest_indirect_offset:6; |
GLuint dest_subreg_nr:3; |
GLuint dest_horiz_stride:2; |
GLuint dest_address_mode:1; |
} ia16; |
struct { |
GLuint dest_reg_file:2; |
GLuint dest_reg_type:3; |
GLuint src0_reg_file:2; |
GLuint src0_reg_type:3; |
GLuint src1_reg_file:2; |
GLuint src1_reg_type:3; |
GLuint pad:1; |
GLint jump_count:16; |
} branch_gen6; |
struct { |
GLuint dest_reg_file:1; /* gen6, not gen7+ */ |
GLuint flag_subreg_num:1; |
GLuint flag_reg_nr:1; /* gen7+ */ |
GLuint pad0:1; |
GLuint src0_abs:1; |
GLuint src0_negate:1; |
GLuint src1_abs:1; |
GLuint src1_negate:1; |
GLuint src2_abs:1; |
GLuint src2_negate:1; |
GLuint src_type:2; /* gen7+ */ |
GLuint dst_type:2; /* gen7+ */ |
GLuint pad1:1; |
GLuint nibctrl:1; /* gen7+ */ |
GLuint pad2:1; |
GLuint dest_writemask:4; |
GLuint dest_subreg_nr:3; |
GLuint dest_reg_nr:8; |
} da3src; |
uint32_t ud; |
} bits1; |
union { |
struct |
{ |
GLuint src0_subreg_nr:5; |
GLuint src0_reg_nr:8; |
GLuint src0_abs:1; |
GLuint src0_negate:1; |
GLuint src0_address_mode:1; |
GLuint src0_horiz_stride:2; |
GLuint src0_width:3; |
GLuint src0_vert_stride:4; |
GLuint flag_subreg_nr:1; |
GLuint flag_reg_nr:1; /* gen7+ */ |
GLuint pad:5; |
} da1; |
struct |
{ |
GLint src0_indirect_offset:10; |
GLuint src0_subreg_nr:3; |
GLuint src0_abs:1; |
GLuint src0_negate:1; |
GLuint src0_address_mode:1; |
GLuint src0_horiz_stride:2; |
GLuint src0_width:3; |
GLuint src0_vert_stride:4; |
GLuint flag_subreg_nr:1; |
GLuint flag_reg_nr:1; /* gen7+ */ |
GLuint pad:5; |
} ia1; |
struct |
{ |
GLuint src0_swz_x:2; |
GLuint src0_swz_y:2; |
GLuint src0_subreg_nr:1; |
GLuint src0_reg_nr:8; |
GLuint src0_abs:1; |
GLuint src0_negate:1; |
GLuint src0_address_mode:1; |
GLuint src0_swz_z:2; |
GLuint src0_swz_w:2; |
GLuint pad0:1; |
GLuint src0_vert_stride:4; |
GLuint flag_subreg_nr:1; |
GLuint flag_reg_nr:1; /* gen7+ */ |
GLuint pad1:5; |
} da16; |
struct |
{ |
GLuint src0_swz_x:2; |
GLuint src0_swz_y:2; |
GLint src0_indirect_offset:6; |
GLuint src0_subreg_nr:3; |
GLuint src0_abs:1; |
GLuint src0_negate:1; |
GLuint src0_address_mode:1; |
GLuint src0_swz_z:2; |
GLuint src0_swz_w:2; |
GLuint pad0:1; |
GLuint src0_vert_stride:4; |
GLuint flag_subreg_nr:1; |
GLuint flag_reg_nr:1; /* gen7+ */ |
GLuint pad1:5; |
} ia16; |
/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. |
* |
* Does not apply to Gen6+. The SFID/message target moved to bits |
* 27:24 of the header (destreg__conditionalmod); EOT is in bits3. |
*/ |
struct |
{ |
GLuint pad:26; |
GLuint end_of_thread:1; |
GLuint pad1:1; |
GLuint sfid:4; |
} send_gen5; /* for Ironlake only */ |
struct { |
GLuint src0_rep_ctrl:1; |
GLuint src0_swizzle:8; |
GLuint src0_subreg_nr:3; |
GLuint src0_reg_nr:8; |
GLuint pad0:1; |
GLuint src1_rep_ctrl:1; |
GLuint src1_swizzle:8; |
GLuint src1_subreg_nr_low:2; |
} da3src; |
uint32_t ud; |
} bits2; |
union |
{ |
struct |
{ |
GLuint src1_subreg_nr:5; |
GLuint src1_reg_nr:8; |
GLuint src1_abs:1; |
GLuint src1_negate:1; |
GLuint src1_address_mode:1; |
GLuint src1_horiz_stride:2; |
GLuint src1_width:3; |
GLuint src1_vert_stride:4; |
GLuint pad0:7; |
} da1; |
struct |
{ |
GLuint src1_swz_x:2; |
GLuint src1_swz_y:2; |
GLuint src1_subreg_nr:1; |
GLuint src1_reg_nr:8; |
GLuint src1_abs:1; |
GLuint src1_negate:1; |
GLuint src1_address_mode:1; |
GLuint src1_swz_z:2; |
GLuint src1_swz_w:2; |
GLuint pad1:1; |
GLuint src1_vert_stride:4; |
GLuint pad2:7; |
} da16; |
struct |
{ |
GLint src1_indirect_offset:10; |
GLuint src1_subreg_nr:3; |
GLuint src1_abs:1; |
GLuint src1_negate:1; |
GLuint src1_address_mode:1; |
GLuint src1_horiz_stride:2; |
GLuint src1_width:3; |
GLuint src1_vert_stride:4; |
GLuint pad1:7; |
} ia1; |
struct |
{ |
GLuint src1_swz_x:2; |
GLuint src1_swz_y:2; |
GLint src1_indirect_offset:6; |
GLuint src1_subreg_nr:3; |
GLuint src1_abs:1; |
GLuint src1_negate:1; |
GLuint pad0:1; |
GLuint src1_swz_z:2; |
GLuint src1_swz_w:2; |
GLuint pad1:1; |
GLuint src1_vert_stride:4; |
GLuint pad2:7; |
} ia16; |
struct |
{ |
GLint jump_count:16; /* note: signed */ |
GLuint pop_count:4; |
GLuint pad0:12; |
} if_else; |
/* This is also used for gen7 IF/ELSE instructions */ |
struct |
{ |
/* Signed jump distance to the ip to jump to if all channels |
* are disabled after the break or continue. It should point |
* to the end of the innermost control flow block, as that's |
* where some channel could get re-enabled. |
*/ |
int jip:16; |
/* Signed jump distance to the location to resume execution |
* of this channel if it's enabled for the break or continue. |
*/ |
int uip:16; |
} break_cont; |
/** |
* \defgroup SEND instructions / Message Descriptors |
* |
* @{ |
*/ |
/** |
* Generic Message Descriptor for Gen4 SEND instructions. The structs |
* below expand function_control to something specific for their |
* message. Due to struct packing issues, they duplicate these bits. |
* |
* See the G45 PRM, Volume 4, Table 14-15. |
*/ |
struct { |
GLuint function_control:16; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
} generic; |
/** |
* Generic Message Descriptor for Gen5-7 SEND instructions. |
* |
* See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most |
* of the information on the SEND instruction is missing from the public |
* Ironlake PRM.) |
* |
* The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. |
* According to the SEND instruction description: |
* "The MSb of the message description, the EOT field, always comes from |
* bit 127 of the instruction word"...which is bit 31 of this field. |
*/ |
struct { |
GLuint function_control:19; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} generic_gen5; |
/** G45 PRM, Volume 4, Section 6.1.1.1 */ |
struct { |
GLuint function:4; |
GLuint int_type:1; |
GLuint precision:1; |
GLuint saturate:1; |
GLuint data_type:1; |
GLuint pad0:8; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
} math; |
/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ |
struct { |
GLuint function:4; |
GLuint int_type:1; |
GLuint precision:1; |
GLuint saturate:1; |
GLuint data_type:1; |
GLuint snapshot:1; |
GLuint pad0:10; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} math_gen5; |
/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ |
struct { |
GLuint binding_table_index:8; |
GLuint sampler:4; |
GLuint return_format:2; |
GLuint msg_type:2; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
} sampler; |
/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ |
struct { |
GLuint binding_table_index:8; |
GLuint sampler:4; |
GLuint msg_type:4; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
} sampler_g4x; |
/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ |
struct { |
GLuint binding_table_index:8; |
GLuint sampler:4; |
GLuint msg_type:4; |
GLuint simd_mode:2; |
GLuint pad0:1; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} sampler_gen5; |
struct { |
GLuint binding_table_index:8; |
GLuint sampler:4; |
GLuint msg_type:5; |
GLuint simd_mode:2; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} sampler_gen7; |
struct brw_urb_immediate urb; |
struct { |
GLuint opcode:4; |
GLuint offset:6; |
GLuint swizzle_control:2; |
GLuint pad:1; |
GLuint allocate:1; |
GLuint used:1; |
GLuint complete:1; |
GLuint pad0:3; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} urb_gen5; |
struct { |
GLuint opcode:3; |
GLuint offset:11; |
GLuint swizzle_control:1; |
GLuint complete:1; |
GLuint per_slot_offset:1; |
GLuint pad0:2; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} urb_gen7; |
/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:4; |
GLuint msg_type:2; |
GLuint target_cache:2; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
} dp_read; |
/** G45 PRM, Volume 4, Section 5.10.1.1.2 */ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:3; |
GLuint msg_type:3; |
GLuint target_cache:2; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
} dp_read_g4x; |
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:3; |
GLuint msg_type:3; |
GLuint target_cache:2; |
GLuint pad0:3; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} dp_read_gen5; |
/** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:3; |
GLuint last_render_target:1; |
GLuint msg_type:3; |
GLuint send_commit_msg:1; |
GLuint response_length:4; |
GLuint msg_length:4; |
GLuint msg_target:4; |
GLuint pad1:3; |
GLuint end_of_thread:1; |
} dp_write; |
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:3; |
GLuint last_render_target:1; |
GLuint msg_type:3; |
GLuint send_commit_msg:1; |
GLuint pad0:3; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} dp_write_gen5; |
/** |
* Message for the Sandybridge Sampler Cache or Constant Cache Data Port. |
* |
* See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. |
**/ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:5; |
GLuint msg_type:3; |
GLuint pad0:3; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} gen6_dp_sampler_const_cache; |
/** |
* Message for the Sandybridge Render Cache Data Port. |
* |
* Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, |
* Section 3.9.2.1.1: Message Descriptor. |
* |
* "Slot Group Select" and "Last Render Target" are part of the |
* 5-bit message control for Render Target Write messages. See |
* Section 3.9.9.2.1 of the same volume. |
*/ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:3; |
GLuint slot_group_select:1; |
GLuint last_render_target:1; |
GLuint msg_type:4; |
GLuint send_commit_msg:1; |
GLuint pad0:1; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad1:2; |
GLuint end_of_thread:1; |
} gen6_dp; |
/** |
* Message for any of the Gen7 Data Port caches. |
* |
* Most fields are defined in BSpec volume 5c.2 Data Port / Messages / |
* Data Port Messages / Message Descriptor. Once again, "Slot Group |
* Select" and "Last Render Target" are part of the 6-bit message |
* control for Render Target Writes. |
*/ |
struct { |
GLuint binding_table_index:8; |
GLuint msg_control:3; |
GLuint slot_group_select:1; |
GLuint last_render_target:1; |
GLuint msg_control_pad:1; |
GLuint msg_type:4; |
GLuint pad1:1; |
GLuint header_present:1; |
GLuint response_length:5; |
GLuint msg_length:4; |
GLuint pad2:2; |
GLuint end_of_thread:1; |
} gen7_dp; |
/** @} */ |
struct { |
GLuint src1_subreg_nr_high:1; |
GLuint src1_reg_nr:8; |
GLuint pad0:1; |
GLuint src2_rep_ctrl:1; |
GLuint src2_swizzle:8; |
GLuint src2_subreg_nr:3; |
GLuint src2_reg_nr:8; |
GLuint pad1:2; |
} da3src; |
GLint d; |
GLuint ud; |
float f; |
} bits3; |
}; |
struct brw_compact_instruction { |
struct { |
unsigned opcode:7; /* 0- 6 */ |
unsigned debug_control:1; /* 7- 7 */ |
unsigned control_index:5; /* 8-12 */ |
unsigned data_type_index:5; /* 13-17 */ |
unsigned sub_reg_index:5; /* 18-22 */ |
unsigned acc_wr_control:1; /* 23-23 */ |
unsigned conditionalmod:4; /* 24-27 */ |
unsigned flag_subreg_nr:1; /* 28-28 */ |
unsigned cmpt_ctrl:1; /* 29-29 */ |
unsigned src0_index:2; /* 30-31 */ |
} dw0; |
struct { |
unsigned src0_index:3; /* 32-24 */ |
unsigned src1_index:5; /* 35-39 */ |
unsigned dst_reg_nr:8; /* 40-47 */ |
unsigned src0_reg_nr:8; /* 48-55 */ |
unsigned src1_reg_nr:8; /* 56-63 */ |
} dw1; |
}; |
#endif |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/intel_chipset.h |
---|
0,0 → 1,266 |
/* |
* Copyright © 2007 Intel Corporation |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice (including the next |
* paragraph) shall be included in all copies or substantial portions of the |
* Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
* IN THE SOFTWARE. |
* |
* Authors: |
* Eric Anholt <eric@anholt.net> |
* |
*/ |
#define PCI_CHIP_I810 0x7121 |
#define PCI_CHIP_I810_DC100 0x7123 |
#define PCI_CHIP_I810_E 0x7125 |
#define PCI_CHIP_I815 0x1132 |
#define PCI_CHIP_I830_M 0x3577 |
#define PCI_CHIP_845_G 0x2562 |
#define PCI_CHIP_I855_GM 0x3582 |
#define PCI_CHIP_I865_G 0x2572 |
#define PCI_CHIP_I915_G 0x2582 |
#define PCI_CHIP_E7221_G 0x258A |
#define PCI_CHIP_I915_GM 0x2592 |
#define PCI_CHIP_I945_G 0x2772 |
#define PCI_CHIP_I945_GM 0x27A2 |
#define PCI_CHIP_I945_GME 0x27AE |
#define PCI_CHIP_Q35_G 0x29B2 |
#define PCI_CHIP_G33_G 0x29C2 |
#define PCI_CHIP_Q33_G 0x29D2 |
#define PCI_CHIP_IGD_GM 0xA011 |
#define PCI_CHIP_IGD_G 0xA001 |
#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) |
#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) |
#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) |
#define PCI_CHIP_I965_G 0x29A2 |
#define PCI_CHIP_I965_Q 0x2992 |
#define PCI_CHIP_I965_G_1 0x2982 |
#define PCI_CHIP_I946_GZ 0x2972 |
#define PCI_CHIP_I965_GM 0x2A02 |
#define PCI_CHIP_I965_GME 0x2A12 |
#define PCI_CHIP_GM45_GM 0x2A42 |
#define PCI_CHIP_IGD_E_G 0x2E02 |
#define PCI_CHIP_Q45_G 0x2E12 |
#define PCI_CHIP_G45_G 0x2E22 |
#define PCI_CHIP_G41_G 0x2E32 |
#define PCI_CHIP_B43_G 0x2E42 |
#define PCI_CHIP_B43_G1 0x2E92 |
#define PCI_CHIP_ILD_G 0x0042 |
#define PCI_CHIP_ILM_G 0x0046 |
#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ |
#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 |
#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 |
#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ |
#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 |
#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 |
#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */ |
#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ |
#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 |
#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ |
#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 |
#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ |
#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a |
#define PCI_CHIP_BAYTRAIL_M_1 0x0F31 |
#define PCI_CHIP_BAYTRAIL_M_2 0x0F32 |
#define PCI_CHIP_BAYTRAIL_M_3 0x0F33 |
#define PCI_CHIP_BAYTRAIL_M_4 0x0157 |
#define PCI_CHIP_BAYTRAIL_D 0x0155 |
#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ |
#define PCI_CHIP_HASWELL_GT2 0x0412 |
#define PCI_CHIP_HASWELL_GT3 0x0422 |
#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ |
#define PCI_CHIP_HASWELL_M_GT2 0x0416 |
#define PCI_CHIP_HASWELL_M_GT3 0x0426 |
#define PCI_CHIP_HASWELL_S_GT1 0x040A /* Server */ |
#define PCI_CHIP_HASWELL_S_GT2 0x041A |
#define PCI_CHIP_HASWELL_S_GT3 0x042A |
#define PCI_CHIP_HASWELL_SDV_GT1 0x0C02 /* Desktop */ |
#define PCI_CHIP_HASWELL_SDV_GT2 0x0C12 |
#define PCI_CHIP_HASWELL_SDV_GT3 0x0C22 |
#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 /* Mobile */ |
#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 |
#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0C26 |
#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A /* Server */ |
#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A |
#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0C2A |
#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ |
#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 |
#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22 |
#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ |
#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 |
#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 |
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ |
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A |
#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A |
#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ |
#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 |
#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22 |
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ |
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 |
#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 |
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ |
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A |
#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A |
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ |
devid == PCI_CHIP_I915_GM || \ |
devid == PCI_CHIP_I945_GM || \ |
devid == PCI_CHIP_I945_GME || \ |
devid == PCI_CHIP_I965_GM || \ |
devid == PCI_CHIP_I965_GME || \ |
devid == PCI_CHIP_GM45_GM || \ |
IS_IGD(devid) || \ |
devid == PCI_CHIP_ILM_G) |
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ |
devid == PCI_CHIP_Q45_G || \ |
devid == PCI_CHIP_G45_G || \ |
devid == PCI_CHIP_G41_G || \ |
devid == PCI_CHIP_B43_G || \ |
devid == PCI_CHIP_B43_G1) |
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) |
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) |
#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) |
#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) |
#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) |
#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ |
devid == PCI_CHIP_E7221_G || \ |
devid == PCI_CHIP_I915_GM) |
#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ |
devid == PCI_CHIP_I945_GM || \ |
devid == PCI_CHIP_I945_GME || \ |
devid == PCI_CHIP_G33_G || \ |
devid == PCI_CHIP_Q33_G || \ |
devid == PCI_CHIP_Q35_G || IS_IGD(devid)) |
#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ |
devid == PCI_CHIP_I965_Q || \ |
devid == PCI_CHIP_I965_G_1 || \ |
devid == PCI_CHIP_I965_GM || \ |
devid == PCI_CHIP_I965_GME || \ |
devid == PCI_CHIP_I946_GZ || \ |
IS_G4X(devid)) |
/* Compat macro for intel_decode.c */ |
#define IS_IRONLAKE(devid) IS_GEN5(devid) |
#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ |
devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ |
devid == PCI_CHIP_SANDYBRIDGE_S) |
#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ |
devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ |
devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ |
devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS) |
#define IS_GEN6(devid) (IS_SNB_GT1(devid) || IS_SNB_GT2(devid)) |
#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ |
devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ |
devid == PCI_CHIP_IVYBRIDGE_S_GT1) |
#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ |
devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \ |
devid == PCI_CHIP_IVYBRIDGE_S_GT2) |
#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid)) |
#define IS_BAYTRAIL(devid) (devid == PCI_CHIP_BAYTRAIL_M_1 || \ |
devid == PCI_CHIP_BAYTRAIL_M_2 || \ |
devid == PCI_CHIP_BAYTRAIL_M_3 || \ |
devid == PCI_CHIP_BAYTRAIL_M_4 || \ |
devid == PCI_CHIP_BAYTRAIL_D) |
#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ |
IS_BAYTRAIL(devid) || \ |
IS_HASWELL(devid)) |
#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \ |
devid == PCI_CHIP_HASWELL_M_GT1 || \ |
devid == PCI_CHIP_HASWELL_S_GT1 || \ |
devid == PCI_CHIP_HASWELL_SDV_GT1 || \ |
devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \ |
devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \ |
devid == PCI_CHIP_HASWELL_ULT_GT1 || \ |
devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \ |
devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \ |
devid == PCI_CHIP_HASWELL_CRW_GT1 || \ |
devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \ |
devid == PCI_CHIP_HASWELL_CRW_S_GT1) |
#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2 || \ |
devid == PCI_CHIP_HASWELL_M_GT2 || \ |
devid == PCI_CHIP_HASWELL_S_GT2 || \ |
devid == PCI_CHIP_HASWELL_SDV_GT2 || \ |
devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \ |
devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \ |
devid == PCI_CHIP_HASWELL_ULT_GT2 || \ |
devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \ |
devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \ |
devid == PCI_CHIP_HASWELL_CRW_GT2 || \ |
devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \ |
devid == PCI_CHIP_HASWELL_CRW_S_GT2) |
#define IS_HSW_GT3(devid) (devid == PCI_CHIP_HASWELL_GT3 || \ |
devid == PCI_CHIP_HASWELL_M_GT3 || \ |
devid == PCI_CHIP_HASWELL_S_GT3 || \ |
devid == PCI_CHIP_HASWELL_SDV_GT3 || \ |
devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \ |
devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \ |
devid == PCI_CHIP_HASWELL_ULT_GT3 || \ |
devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \ |
devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \ |
devid == PCI_CHIP_HASWELL_CRW_GT3 || \ |
devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \ |
devid == PCI_CHIP_HASWELL_CRW_S_GT3) |
#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ |
IS_HSW_GT2(devid) || \ |
IS_HSW_GT3(devid)) |
#define IS_965(devid) (IS_GEN4(devid) || \ |
IS_G4X(devid) || \ |
IS_GEN5(devid) || \ |
IS_GEN6(devid) || \ |
IS_GEN7(devid)) |
#define IS_9XX(devid) (IS_915(devid) || \ |
IS_945(devid) || \ |
IS_965(devid)) |
#define IS_GEN3(devid) (IS_915(devid) || \ |
IS_945(devid)) |
#define IS_GEN2(devid) (devid == PCI_CHIP_I830_M || \ |
devid == PCI_CHIP_845_G || \ |
devid == PCI_CHIP_I855_GM || \ |
devid == PCI_CHIP_I865_G) |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/intel_reg.h |
---|
0,0 → 1,298 |
/************************************************************************** |
* |
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
#define CMD_MI (0x0 << 29) |
#define CMD_2D (0x2 << 29) |
#define CMD_3D (0x3 << 29) |
#define MI_NOOP (CMD_MI | 0) |
#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) |
#define MI_FLUSH (CMD_MI | (4 << 23)) |
#define FLUSH_MAP_CACHE (1 << 0) |
#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) |
#define MI_LOAD_REGISTER_IMM (CMD_MI | (0x22 << 23)) |
#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2) |
/* Stalls command execution waiting for the given events to have occurred. */ |
#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) |
#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) |
#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) |
#define MI_STORE_REGISTER_MEM (CMD_MI | (0x24 << 23)) |
# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22) |
/* p189 */ |
#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) |
#define I1_LOAD_S(n) (1<<(4+n)) |
#define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3) |
/** @{ |
* |
* PIPE_CONTROL operation, a combination MI_FLUSH and register write with |
* additional flushing control. |
*/ |
#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24)) |
#define PIPE_CONTROL_CS_STALL (1 << 20) |
#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19) |
#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18) |
#define PIPE_CONTROL_SYNC_GFDT (1 << 17) |
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16) |
#define PIPE_CONTROL_NO_WRITE (0 << 14) |
#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) |
#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) |
#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) |
#define PIPE_CONTROL_DEPTH_STALL (1 << 13) |
#define PIPE_CONTROL_WRITE_FLUSH (1 << 12) |
#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11) |
#define PIPE_CONTROL_TC_FLUSH (1 << 10) /* GM45+ only */ |
#define PIPE_CONTROL_ISP_DIS (1 << 9) |
#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) |
/* GT */ |
#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) |
#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) |
#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) |
#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) |
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) |
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) |
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) |
/** @} */ |
/** @{ |
* 915 definitions |
* |
* 915 documents say that bits 31:28 and 1 are "undefined, must be zero." |
*/ |
#define S0_VB_OFFSET_MASK 0x0ffffffc |
#define S0_AUTO_CACHE_INV_DISABLE (1<<0) |
/** @} */ |
/** @{ |
* 830 definitions |
*/ |
#define S0_VB_OFFSET_MASK_830 0xffffff80 |
#define S0_VB_PITCH_SHIFT_830 1 |
#define S0_VB_ENABLE_830 (1<<0) |
/** @} */ |
#define S1_VERTEX_WIDTH_SHIFT 24 |
#define S1_VERTEX_WIDTH_MASK (0x3f<<24) |
#define S1_VERTEX_PITCH_SHIFT 16 |
#define S1_VERTEX_PITCH_MASK (0x3f<<16) |
#define TEXCOORDFMT_2D 0x0 |
#define TEXCOORDFMT_3D 0x1 |
#define TEXCOORDFMT_4D 0x2 |
#define TEXCOORDFMT_1D 0x3 |
#define TEXCOORDFMT_2D_16 0x4 |
#define TEXCOORDFMT_4D_16 0x5 |
#define TEXCOORDFMT_NOT_PRESENT 0xf |
#define S2_TEXCOORD_FMT0_MASK 0xf |
#define S2_TEXCOORD_FMT1_SHIFT 4 |
#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) |
#define S2_TEXCOORD_NONE (~0) |
#define S2_TEX_COUNT_SHIFT_830 12 |
#define S2_VERTEX_1_WIDTH_SHIFT_830 0 |
#define S2_VERTEX_0_WIDTH_SHIFT_830 6 |
/* S3 not interesting */ |
#define S4_POINT_WIDTH_SHIFT 23 |
#define S4_POINT_WIDTH_MASK (0x1ff<<23) |
#define S4_LINE_WIDTH_SHIFT 19 |
#define S4_LINE_WIDTH_ONE (0x2<<19) |
#define S4_LINE_WIDTH_MASK (0xf<<19) |
#define S4_FLATSHADE_ALPHA (1<<18) |
#define S4_FLATSHADE_FOG (1<<17) |
#define S4_FLATSHADE_SPECULAR (1<<16) |
#define S4_FLATSHADE_COLOR (1<<15) |
#define S4_CULLMODE_BOTH (0<<13) |
#define S4_CULLMODE_NONE (1<<13) |
#define S4_CULLMODE_CW (2<<13) |
#define S4_CULLMODE_CCW (3<<13) |
#define S4_CULLMODE_MASK (3<<13) |
#define S4_VFMT_POINT_WIDTH (1<<12) |
#define S4_VFMT_SPEC_FOG (1<<11) |
#define S4_VFMT_COLOR (1<<10) |
#define S4_VFMT_DEPTH_OFFSET (1<<9) |
#define S4_VFMT_XYZ (1<<6) |
#define S4_VFMT_XYZW (2<<6) |
#define S4_VFMT_XY (3<<6) |
#define S4_VFMT_XYW (4<<6) |
#define S4_VFMT_XYZW_MASK (7<<6) |
#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) |
#define S4_FORCE_DEFAULT_SPECULAR (1<<4) |
#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) |
#define S4_VFMT_FOG_PARAM (1<<2) |
#define S4_SPRITE_POINT_ENABLE (1<<1) |
#define S4_LINE_ANTIALIAS_ENABLE (1<<0) |
#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ |
S4_VFMT_SPEC_FOG | \ |
S4_VFMT_COLOR | \ |
S4_VFMT_DEPTH_OFFSET | \ |
S4_VFMT_XYZW_MASK | \ |
S4_VFMT_FOG_PARAM) |
#define S5_WRITEDISABLE_ALPHA (1<<31) |
#define S5_WRITEDISABLE_RED (1<<30) |
#define S5_WRITEDISABLE_GREEN (1<<29) |
#define S5_WRITEDISABLE_BLUE (1<<28) |
#define S5_WRITEDISABLE_MASK (0xf<<28) |
#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) |
#define S5_LAST_PIXEL_ENABLE (1<<26) |
#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) |
#define S5_FOG_ENABLE (1<<24) |
#define S5_STENCIL_REF_SHIFT 16 |
#define S5_STENCIL_REF_MASK (0xff<<16) |
#define S5_STENCIL_TEST_FUNC_SHIFT 13 |
#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) |
#define S5_STENCIL_FAIL_SHIFT 10 |
#define S5_STENCIL_FAIL_MASK (0x7<<10) |
#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 |
#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) |
#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 |
#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) |
#define S5_STENCIL_WRITE_ENABLE (1<<3) |
#define S5_STENCIL_TEST_ENABLE (1<<2) |
#define S5_COLOR_DITHER_ENABLE (1<<1) |
#define S5_LOGICOP_ENABLE (1<<0) |
#define S6_ALPHA_TEST_ENABLE (1<<31) |
#define S6_ALPHA_TEST_FUNC_SHIFT 28 |
#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) |
#define S6_ALPHA_REF_SHIFT 20 |
#define S6_ALPHA_REF_MASK (0xff<<20) |
#define S6_DEPTH_TEST_ENABLE (1<<19) |
#define S6_DEPTH_TEST_FUNC_SHIFT 16 |
#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) |
#define S6_CBUF_BLEND_ENABLE (1<<15) |
#define S6_CBUF_BLEND_FUNC_SHIFT 12 |
#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) |
#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 |
#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) |
#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 |
#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) |
#define S6_DEPTH_WRITE_ENABLE (1<<3) |
#define S6_COLOR_WRITE_ENABLE (1<<2) |
#define S6_TRISTRIP_PV_SHIFT 0 |
#define S6_TRISTRIP_PV_MASK (0x3<<0) |
#define S7_DEPTH_OFFSET_CONST_MASK ~0 |
/* p143 */ |
#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) |
/* Dword 1 */ |
#define BUF_3D_ID_COLOR_BACK (0x3<<24) |
#define BUF_3D_ID_DEPTH (0x7<<24) |
#define BUF_3D_USE_FENCE (1<<23) |
#define BUF_3D_TILED_SURFACE (1<<22) |
#define BUF_3D_TILE_WALK_X 0 |
#define BUF_3D_TILE_WALK_Y (1<<21) |
#define BUF_3D_PITCH(x) (((x)/4)<<2) |
/* Dword 2 */ |
#define BUF_3D_ADDR(x) ((x) & ~0x3) |
/* Primitive dispatch on 830-945 */ |
#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) |
#define PRIM_INDIRECT (1<<23) |
#define PRIM_INLINE (0<<23) |
#define PRIM_INDIRECT_SEQUENTIAL (0<<17) |
#define PRIM_INDIRECT_ELTS (1<<17) |
#define PRIM3D_TRILIST (0x0<<18) |
#define PRIM3D_TRISTRIP (0x1<<18) |
#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) |
#define PRIM3D_TRIFAN (0x3<<18) |
#define PRIM3D_POLY (0x4<<18) |
#define PRIM3D_LINELIST (0x5<<18) |
#define PRIM3D_LINESTRIP (0x6<<18) |
#define PRIM3D_RECTLIST (0x7<<18) |
#define PRIM3D_POINTLIST (0x8<<18) |
#define PRIM3D_DIB (0x9<<18) |
#define PRIM3D_MASK (0x1f<<18) |
#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22)) |
#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22)) |
#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22)) |
#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) |
# define XY_TEXT_BYTE_PACKED (1 << 16) |
/* BR00 */ |
#define XY_BLT_WRITE_ALPHA (1 << 21) |
#define XY_BLT_WRITE_RGB (1 << 20) |
#define XY_SRC_TILED (1 << 15) |
#define XY_DST_TILED (1 << 11) |
/* BR13 */ |
#define BR13_8 (0x0 << 24) |
#define BR13_565 (0x1 << 24) |
#define BR13_8888 (0x3 << 24) |
#define FENCE_LINEAR 0 |
#define FENCE_XMAJOR 1 |
#define FENCE_YMAJOR 2 |
/* Pipeline Statistics Counter Registers */ |
#define IA_VERTICES_COUNT 0x2310 |
#define IA_PRIMITIVES_COUNT 0x2318 |
#define VS_INVOCATION_COUNT 0x2320 |
#define HS_INVOCATION_COUNT 0x2300 |
#define DS_INVOCATION_COUNT 0x2308 |
#define GS_INVOCATION_COUNT 0x2328 |
#define GS_PRIMITIVES_COUNT 0x2330 |
#define CL_INVOCATION_COUNT 0x2338 |
#define CL_PRIMITIVES_COUNT 0x2340 |
#define PS_INVOCATION_COUNT 0x2348 |
#define PS_DEPTH_COUNT 0x2350 |
#define SO_NUM_PRIM_STORAGE_NEEDED 0x2280 |
#define SO_PRIM_STORAGE_NEEDED0_IVB 0x5240 |
#define SO_PRIM_STORAGE_NEEDED1_IVB 0x5248 |
#define SO_PRIM_STORAGE_NEEDED2_IVB 0x5250 |
#define SO_PRIM_STORAGE_NEEDED3_IVB 0x5258 |
#define SO_NUM_PRIMS_WRITTEN 0x2288 |
#define SO_NUM_PRIMS_WRITTEN0_IVB 0x5200 |
#define SO_NUM_PRIMS_WRITTEN1_IVB 0x5208 |
#define SO_NUM_PRIMS_WRITTEN2_IVB 0x5210 |
#define SO_NUM_PRIMS_WRITTEN3_IVB 0x5218 |
#define TIMESTAMP 0x2358 |
#define BCS_SWCTRL 0x22200 |
# define BCS_SWCTRL_SRC_Y (1 << 0) |
# define BCS_SWCTRL_DST_Y (1 << 1) |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_cs.c |
---|
0,0 → 1,38 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "ilo_shader_internal.h" |
/** |
* Compile the compute shader. |
*/ |
struct ilo_shader * |
ilo_shader_compile_cs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
return NULL; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_fs.c |
---|
0,0 → 1,1799 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "tgsi/tgsi_dump.h" |
#include "tgsi/tgsi_util.h" |
#include "toy_compiler.h" |
#include "toy_tgsi.h" |
#include "toy_legalize.h" |
#include "toy_optimize.h" |
#include "toy_helpers.h" |
#include "ilo_context.h" |
#include "ilo_shader_internal.h" |
struct fs_compile_context { |
struct ilo_shader *shader; |
const struct ilo_shader_variant *variant; |
struct toy_compiler tc; |
struct toy_tgsi tgsi; |
enum brw_message_target const_cache; |
int dispatch_mode; |
struct { |
int barycentric_interps[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; |
int source_depth; |
int source_w; |
int pos_offset; |
} payloads[2]; |
int first_const_grf; |
int first_attr_grf; |
int first_free_grf; |
int last_free_grf; |
int num_grf_per_vrf; |
int first_free_mrf; |
int last_free_mrf; |
}; |
static void |
fetch_position(struct fs_compile_context *fcc, struct toy_dst dst) |
{ |
struct toy_compiler *tc = &fcc->tc; |
const struct toy_src src_z = |
tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0); |
const struct toy_src src_w = |
tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0); |
const int fb_height = |
(fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1; |
const bool origin_upper_left = |
(fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT); |
const bool pixel_center_integer = |
(fcc->tgsi.props.fs_coord_pixel_center == |
TGSI_FS_COORD_PIXEL_CENTER_INTEGER); |
struct toy_src subspan_x, subspan_y; |
struct toy_dst tmp, tmp_uw; |
struct toy_dst real_dst[4]; |
tdst_transpose(dst, real_dst); |
subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4)); |
subspan_x = tsrc_rect(subspan_x, TOY_RECT_240); |
subspan_y = tsrc_offset(subspan_x, 0, 1); |
tmp_uw = tdst_uw(tc_alloc_tmp(tc)); |
tmp = tc_alloc_tmp(tc); |
/* X */ |
tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010)); |
tc_MOV(tc, tmp, tsrc_from(tmp_uw)); |
if (pixel_center_integer) |
tc_MOV(tc, real_dst[0], tsrc_from(tmp)); |
else |
tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f)); |
/* Y */ |
tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100)); |
tc_MOV(tc, tmp, tsrc_from(tmp_uw)); |
if (origin_upper_left && pixel_center_integer) { |
tc_MOV(tc, real_dst[1], tsrc_from(tmp)); |
} |
else { |
struct toy_src y = tsrc_from(tmp); |
float offset = 0.0f; |
if (!pixel_center_integer) |
offset += 0.5f; |
if (!origin_upper_left) { |
offset += (float) (fb_height - 1); |
y = tsrc_negate(y); |
} |
tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset)); |
} |
/* Z and W */ |
tc_MOV(tc, real_dst[2], src_z); |
tc_INV(tc, real_dst[3], src_w); |
} |
static void |
fetch_face(struct fs_compile_context *fcc, struct toy_dst dst) |
{ |
struct toy_compiler *tc = &fcc->tc; |
const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0)); |
struct toy_dst tmp_f, tmp; |
struct toy_dst real_dst[4]; |
tdst_transpose(dst, real_dst); |
tmp_f = tc_alloc_tmp(tc); |
tmp = tdst_d(tmp_f); |
tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15)); |
tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1)); |
tc_MOV(tc, tmp_f, tsrc_from(tmp)); |
/* convert to 1.0 and -1.0 */ |
tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f)); |
tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f)); |
tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f)); |
tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f)); |
tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f)); |
} |
static void |
fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot) |
{ |
struct toy_compiler *tc = &fcc->tc; |
struct toy_dst real_dst[4]; |
bool is_const = false; |
int grf, mode, ch; |
tdst_transpose(dst, real_dst); |
grf = fcc->first_attr_grf + slot * 2; |
switch (fcc->tgsi.inputs[slot].interp) { |
case TGSI_INTERPOLATE_CONSTANT: |
is_const = true; |
break; |
case TGSI_INTERPOLATE_LINEAR: |
if (fcc->tgsi.inputs[slot].centroid) |
mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; |
else |
mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; |
break; |
case TGSI_INTERPOLATE_COLOR: |
if (fcc->variant->u.fs.flatshade) { |
is_const = true; |
break; |
} |
/* fall through */ |
case TGSI_INTERPOLATE_PERSPECTIVE: |
if (fcc->tgsi.inputs[slot].centroid) |
mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; |
else |
mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; |
break; |
default: |
assert(!"unexpected FS interpolation"); |
mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; |
break; |
} |
if (is_const) { |
struct toy_src a0[4]; |
a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4); |
a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4); |
a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4); |
a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4); |
for (ch = 0; ch < 4; ch++) |
tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010)); |
} |
else { |
struct toy_src attr[4], uv; |
attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0); |
attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4); |
attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0); |
attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4); |
uv = tsrc(TOY_FILE_GRF, fcc->payloads[0].barycentric_interps[mode], 0); |
for (ch = 0; ch < 4; ch++) { |
tc_add2(tc, BRW_OPCODE_PLN, real_dst[ch], |
tsrc_rect(attr[ch], TOY_RECT_010), uv); |
} |
} |
if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) { |
tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f)); |
tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f)); |
tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f)); |
} |
} |
static void |
fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc, |
struct toy_dst dst, int dim, int idx) |
{ |
int slot; |
assert(!dim); |
slot = toy_tgsi_find_input(&fcc->tgsi, idx); |
if (slot < 0) |
return; |
switch (fcc->tgsi.inputs[slot].semantic_name) { |
case TGSI_SEMANTIC_POSITION: |
fetch_position(fcc, dst); |
break; |
case TGSI_SEMANTIC_FACE: |
fetch_face(fcc, dst); |
break; |
default: |
fetch_attr(fcc, dst, slot); |
break; |
} |
} |
static void |
fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc, |
struct toy_dst dst, int dim, |
struct toy_src idx) |
{ |
const struct toy_dst offset = |
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); |
struct toy_compiler *tc = &fcc->tc; |
unsigned simd_mode, param_size; |
struct toy_inst *inst; |
struct toy_src desc, real_src[4]; |
struct toy_dst tmp, real_dst[4]; |
int i; |
tsrc_transpose(idx, real_src); |
/* set offset */ |
inst = tc_MOV(tc, offset, real_src[0]); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
switch (inst->exec_size) { |
case BRW_EXECUTE_8: |
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; |
param_size = 1; |
break; |
case BRW_EXECUTE_16: |
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
param_size = 2; |
break; |
default: |
assert(!"unsupported execution size"); |
tc_MOV(tc, dst, tsrc_imm_f(0.0f)); |
return; |
break; |
} |
desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false, |
simd_mode, |
GEN5_SAMPLER_MESSAGE_SAMPLE_LD, |
0, |
ILO_WM_CONST_SURFACE(dim)); |
tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0); |
inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
tdst_transpose(dst, real_dst); |
for (i = 0; i < 4; i++) { |
const struct toy_src src = |
tsrc_offset(tsrc_from(tmp), param_size * i, 0); |
/* cast to type D to make sure these are raw moves */ |
tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); |
} |
} |
static void |
fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc, |
struct toy_dst dst, int dim, struct toy_src idx) |
{ |
const struct toy_dst header = |
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); |
const struct toy_dst global_offset = |
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4)); |
const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
struct toy_compiler *tc = &fcc->tc; |
unsigned msg_type, msg_ctrl, msg_len; |
struct toy_inst *inst; |
struct toy_src desc; |
struct toy_dst tmp, real_dst[4]; |
int i; |
/* set message header */ |
inst = tc_MOV(tc, header, r0); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
/* set global offset */ |
inst = tc_MOV(tc, global_offset, idx); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
inst->exec_size = BRW_EXECUTE_1; |
inst->src[0].rect = TOY_RECT_010; |
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ; |
msg_ctrl = BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW << 8; |
msg_len = 1; |
desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false, |
msg_type, msg_ctrl, ILO_WM_CONST_SURFACE(dim)); |
tmp = tc_alloc_tmp(tc); |
tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache); |
tdst_transpose(dst, real_dst); |
for (i = 0; i < 4; i++) { |
const struct toy_src src = |
tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i); |
/* cast to type D to make sure these are raw moves */ |
tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); |
} |
} |
static void |
fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc, |
struct toy_dst dst, int dim, struct toy_src idx) |
{ |
struct toy_compiler *tc = &fcc->tc; |
const struct toy_dst offset = |
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); |
struct toy_src desc; |
struct toy_inst *inst; |
struct toy_dst tmp, real_dst[4]; |
int i; |
/* |
* In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was |
* changed from OWord Block Read to ld to increase performance in the |
* classic driver. Since we use the constant cache instead of the data |
* cache, I wonder if we still want to follow the classic driver. |
*/ |
/* set offset */ |
inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010)); |
inst->exec_size = BRW_EXECUTE_8; |
inst->mask_ctrl = BRW_MASK_DISABLE; |
desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false, |
BRW_SAMPLER_SIMD_MODE_SIMD4X2, |
GEN5_SAMPLER_MESSAGE_SAMPLE_LD, |
0, |
ILO_WM_CONST_SURFACE(dim)); |
tmp = tc_alloc_tmp(tc); |
inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER); |
inst->exec_size = BRW_EXECUTE_8; |
inst->mask_ctrl = BRW_MASK_DISABLE; |
tdst_transpose(dst, real_dst); |
for (i = 0; i < 4; i++) { |
const struct toy_src src = |
tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i); |
/* cast to type D to make sure these are raw moves */ |
tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); |
} |
} |
static void |
fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc, |
struct toy_dst dst, int idx) |
{ |
const uint32_t *imm; |
struct toy_dst real_dst[4]; |
int ch; |
imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL); |
tdst_transpose(dst, real_dst); |
/* raw moves */ |
for (ch = 0; ch < 4; ch++) |
tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch])); |
} |
static void |
fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc, |
struct toy_dst dst, int dim, int idx) |
{ |
struct toy_compiler *tc = &fcc->tc; |
const struct toy_tgsi *tgsi = &fcc->tgsi; |
int slot; |
assert(!dim); |
slot = toy_tgsi_find_system_value(tgsi, idx); |
if (slot < 0) |
return; |
switch (tgsi->system_values[slot].semantic_name) { |
case TGSI_SEMANTIC_PRIMID: |
case TGSI_SEMANTIC_INSTANCEID: |
case TGSI_SEMANTIC_VERTEXID: |
default: |
tc_fail(tc, "unhandled system value"); |
tc_MOV(tc, dst, tsrc_imm_d(0)); |
break; |
} |
} |
static void |
fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &fcc->tc; |
int dim, idx; |
assert(inst->src[0].file == TOY_FILE_IMM); |
dim = inst->src[0].val32; |
assert(inst->src[1].file == TOY_FILE_IMM); |
idx = inst->src[1].val32; |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_IN: |
fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx); |
break; |
case TOY_OPCODE_TGSI_CONST: |
if (tc->dev->gen >= ILO_GEN(7)) |
fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]); |
else |
fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]); |
break; |
case TOY_OPCODE_TGSI_SV: |
fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx); |
break; |
case TOY_OPCODE_TGSI_IMM: |
assert(!dim); |
fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx); |
break; |
default: |
tc_fail(tc, "unhandled TGSI fetch"); |
break; |
} |
tc_discard_inst(tc, inst); |
} |
static void |
fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &fcc->tc; |
enum tgsi_file_type file; |
int dim, idx; |
struct toy_src indirect_dim, indirect_idx; |
assert(inst->src[0].file == TOY_FILE_IMM); |
file = inst->src[0].val32; |
assert(inst->src[1].file == TOY_FILE_IMM); |
dim = inst->src[1].val32; |
indirect_dim = inst->src[2]; |
assert(inst->src[3].file == TOY_FILE_IMM); |
idx = inst->src[3].val32; |
indirect_idx = inst->src[4]; |
/* no dimension indirection */ |
assert(indirect_dim.file == TOY_FILE_IMM); |
dim += indirect_dim.val32; |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
if (file == TGSI_FILE_CONSTANT) { |
if (idx) { |
struct toy_dst tmp = tc_alloc_tmp(tc); |
tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx)); |
indirect_idx = tsrc_from(tmp); |
} |
fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx); |
break; |
} |
/* fall through */ |
case TOY_OPCODE_TGSI_INDIRECT_STORE: |
default: |
tc_fail(tc, "unhandled TGSI indirection"); |
break; |
} |
tc_discard_inst(tc, inst); |
} |
/** |
* Emit instructions to move sampling parameters to the message registers. |
*/ |
static int |
fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type, |
int base_mrf, int param_size, |
struct toy_src *coords, int num_coords, |
struct toy_src bias_or_lod, struct toy_src ref_or_si, |
struct toy_src *ddx, struct toy_src *ddy, |
int num_derivs) |
{ |
int num_params, i; |
assert(num_coords <= 4); |
assert(num_derivs <= 3 && num_derivs <= num_coords); |
#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0)) |
switch (msg_type) { |
case GEN5_SAMPLER_MESSAGE_SAMPLE: |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
num_params = num_coords; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS: |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD: |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod); |
num_params = 5; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE: |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si); |
num_params = 5; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS: |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
for (i = 0; i < num_derivs; i++) { |
tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]); |
tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]); |
} |
num_params = 4 + num_derivs * 2; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE: |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE: |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si); |
tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod); |
num_params = 6; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LD: |
assert(num_coords <= 3); |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]); |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod); |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si); |
num_params = 5; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO: |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod); |
num_params = 1; |
break; |
default: |
tc_fail(tc, "unknown sampler opcode"); |
num_params = 0; |
break; |
} |
#undef SAMPLER_PARAM |
return num_params * param_size; |
} |
static int |
fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type, |
int base_mrf, int param_size, |
struct toy_src *coords, int num_coords, |
struct toy_src bias_or_lod, struct toy_src ref_or_si, |
struct toy_src *ddx, struct toy_src *ddy, |
int num_derivs) |
{ |
int num_params, i; |
assert(num_coords <= 4); |
assert(num_derivs <= 3 && num_derivs <= num_coords); |
#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0)) |
switch (msg_type) { |
case GEN5_SAMPLER_MESSAGE_SAMPLE: |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
num_params = num_coords; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS: |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD: |
tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod); |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); |
num_params = 1 + num_coords; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE: |
tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); |
num_params = 1 + num_coords; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS: |
for (i = 0; i < num_coords; i++) { |
tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]); |
if (i < num_derivs) { |
tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]); |
tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]); |
} |
} |
num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0); |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE: |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE: |
tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); |
tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod); |
for (i = 0; i < num_coords; i++) |
tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]); |
num_params = 2 + num_coords; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LD: |
assert(num_coords >= 1 && num_coords <= 3); |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]); |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod); |
for (i = 1; i < num_coords; i++) |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]); |
num_params = 1 + num_coords; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO: |
tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod); |
num_params = 1; |
break; |
default: |
tc_fail(tc, "unknown sampler opcode"); |
num_params = 0; |
break; |
} |
#undef SAMPLER_PARAM |
return num_params * param_size; |
} |
/** |
* Set up message registers and return the message descriptor for sampling. |
*/ |
static struct toy_src |
fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst, |
int base_mrf, const uint32_t *saturate_coords, |
unsigned *ret_sampler_index) |
{ |
unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; |
struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si; |
int num_coords, ref_pos, num_derivs; |
int sampler_src, param_size, i; |
switch (inst->exec_size) { |
case BRW_EXECUTE_8: |
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; |
param_size = 1; |
break; |
case BRW_EXECUTE_16: |
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
param_size = 2; |
break; |
default: |
tc_fail(tc, "unsupported execute size for sampling"); |
return tsrc_null(); |
break; |
} |
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos); |
tsrc_transpose(inst->src[0], coords); |
bias_or_lod = tsrc_null(); |
ref_or_si = tsrc_null(); |
num_derivs = 0; |
sampler_src = 1; |
/* |
* For TXD, |
* |
* src0 := (x, y, z, w) |
* src1 := ddx |
* src2 := ddy |
* src3 := sampler |
* |
* For TEX2, TXB2, and TXL2, |
* |
* src0 := (x, y, z, w) |
* src1 := (v or bias or lod, ...) |
* src2 := sampler |
* |
* For TEX, TXB, TXL, and TXP, |
* |
* src0 := (x, y, z, w or bias or lod or projection) |
* src1 := sampler |
* |
* For TXQ, |
* |
* src0 := (lod, ...) |
* src1 := sampler |
* |
* For TXQ_LZ, |
* |
* src0 := sampler |
* |
* And for TXF, |
* |
* src0 := (x, y, z, w or lod) |
* src1 := sampler |
* |
* State trackers should not generate opcode+texture combinations with |
* which the two definitions conflict (e.g., TXB with SHADOW2DARRAY). |
*/ |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_TEX: |
if (ref_pos >= 0) { |
assert(ref_pos < 4); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; |
ref_or_si = coords[ref_pos]; |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; |
} |
break; |
case TOY_OPCODE_TGSI_TXD: |
if (ref_pos >= 0) |
tc_fail(tc, "TXD with shadow sampler not supported"); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; |
tsrc_transpose(inst->src[1], ddx); |
tsrc_transpose(inst->src[2], ddy); |
num_derivs = num_coords; |
sampler_src = 3; |
break; |
case TOY_OPCODE_TGSI_TXP: |
if (ref_pos >= 0) { |
assert(ref_pos < 3); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; |
ref_or_si = coords[ref_pos]; |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; |
} |
/* project the coordinates */ |
{ |
struct toy_dst tmp[4]; |
tc_alloc_tmp4(tc, tmp); |
tc_INV(tc, tmp[3], coords[3]); |
for (i = 0; i < num_coords && i < 3; i++) { |
tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); |
coords[i] = tsrc_from(tmp[i]); |
} |
if (ref_pos >= i) { |
tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3])); |
ref_or_si = tsrc_from(tmp[ref_pos]); |
} |
} |
break; |
case TOY_OPCODE_TGSI_TXB: |
if (ref_pos >= 0) { |
assert(ref_pos < 3); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; |
ref_or_si = coords[ref_pos]; |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; |
} |
bias_or_lod = coords[3]; |
break; |
case TOY_OPCODE_TGSI_TXL: |
if (ref_pos >= 0) { |
assert(ref_pos < 3); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; |
ref_or_si = coords[ref_pos]; |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; |
} |
bias_or_lod = coords[3]; |
break; |
case TOY_OPCODE_TGSI_TXF: |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; |
switch (inst->tex.target) { |
case TGSI_TEXTURE_2D_MSAA: |
case TGSI_TEXTURE_2D_ARRAY_MSAA: |
assert(ref_pos >= 0 && ref_pos < 4); |
/* lod is always 0 */ |
bias_or_lod = tsrc_imm_d(0); |
ref_or_si = coords[ref_pos]; |
break; |
default: |
bias_or_lod = coords[3]; |
break; |
} |
/* offset the coordinates */ |
if (!tsrc_is_null(inst->tex.offsets[0])) { |
struct toy_dst tmp[4]; |
struct toy_src offsets[4]; |
tc_alloc_tmp4(tc, tmp); |
tsrc_transpose(inst->tex.offsets[0], offsets); |
for (i = 0; i < num_coords; i++) { |
tc_ADD(tc, tmp[i], coords[i], offsets[i]); |
coords[i] = tsrc_from(tmp[i]); |
} |
} |
sampler_src = 1; |
break; |
case TOY_OPCODE_TGSI_TXQ: |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; |
num_coords = 0; |
bias_or_lod = coords[0]; |
break; |
case TOY_OPCODE_TGSI_TXQ_LZ: |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; |
num_coords = 0; |
sampler_src = 0; |
break; |
case TOY_OPCODE_TGSI_TEX2: |
if (ref_pos >= 0) { |
assert(ref_pos < 5); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; |
if (ref_pos >= 4) { |
struct toy_src src1[4]; |
tsrc_transpose(inst->src[1], src1); |
ref_or_si = src1[ref_pos - 4]; |
} |
else { |
ref_or_si = coords[ref_pos]; |
} |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; |
} |
sampler_src = 2; |
break; |
case TOY_OPCODE_TGSI_TXB2: |
if (ref_pos >= 0) { |
assert(ref_pos < 4); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; |
ref_or_si = coords[ref_pos]; |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; |
} |
{ |
struct toy_src src1[4]; |
tsrc_transpose(inst->src[1], src1); |
bias_or_lod = src1[0]; |
} |
sampler_src = 2; |
break; |
case TOY_OPCODE_TGSI_TXL2: |
if (ref_pos >= 0) { |
assert(ref_pos < 4); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; |
ref_or_si = coords[ref_pos]; |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; |
} |
{ |
struct toy_src src1[4]; |
tsrc_transpose(inst->src[1], src1); |
bias_or_lod = src1[0]; |
} |
sampler_src = 2; |
break; |
default: |
assert(!"unhandled sampling opcode"); |
return tsrc_null(); |
break; |
} |
assert(inst->src[sampler_src].file == TOY_FILE_IMM); |
sampler_index = inst->src[sampler_src].val32; |
binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 18: |
* |
* "Note that the (cube map) coordinates delivered to the sampling |
* engine must already have been divided by the component with the |
* largest absolute value." |
*/ |
switch (inst->tex.target) { |
case TGSI_TEXTURE_CUBE: |
case TGSI_TEXTURE_SHADOWCUBE: |
case TGSI_TEXTURE_CUBE_ARRAY: |
case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
/* TXQ does not need coordinates */ |
if (num_coords >= 3) { |
struct toy_dst tmp[4]; |
tc_alloc_tmp4(tc, tmp); |
tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]), |
tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE); |
tc_SEL(tc, tmp[3], tsrc_from(tmp[3]), |
tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE); |
tc_INV(tc, tmp[3], tsrc_from(tmp[3])); |
for (i = 0; i < 3; i++) { |
tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); |
coords[i] = tsrc_from(tmp[i]); |
} |
} |
break; |
} |
/* |
* Saturate (s, t, r). saturate_coords is set for sampler and coordinate |
* that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is |
* so that sampling outside the border gets the correct colors. |
*/ |
for (i = 0; i < MIN2(num_coords, 3); i++) { |
bool is_rect; |
if (!(saturate_coords[i] & (1 << sampler_index))) |
continue; |
switch (inst->tex.target) { |
case TGSI_TEXTURE_RECT: |
case TGSI_TEXTURE_SHADOWRECT: |
is_rect = true; |
break; |
default: |
is_rect = false; |
break; |
} |
if (is_rect) { |
struct toy_src min, max; |
struct toy_dst tmp; |
tc_fail(tc, "GL_CLAMP with rectangle texture unsupported"); |
tmp = tc_alloc_tmp(tc); |
/* saturate to [0, width] or [0, height] */ |
/* TODO TXQ? */ |
min = tsrc_imm_f(0.0f); |
max = tsrc_imm_f(2048.0f); |
tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G); |
tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L); |
coords[i] = tsrc_from(tmp); |
} |
else { |
struct toy_dst tmp; |
struct toy_inst *inst2; |
tmp = tc_alloc_tmp(tc); |
/* saturate to [0.0f, 1.0f] */ |
inst2 = tc_MOV(tc, tmp, coords[i]); |
inst2->saturate = true; |
coords[i] = tsrc_from(tmp); |
} |
} |
/* set up sampler parameters */ |
if (tc->dev->gen >= ILO_GEN(7)) { |
msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size, |
coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); |
} |
else { |
msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size, |
coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); |
} |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 136: |
* |
* "The maximum message length allowed to the sampler is 11. This would |
* disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of |
* SIMD16." |
*/ |
if (msg_len > 11) |
tc_fail(tc, "maximum length for messages to the sampler is 11"); |
if (ret_sampler_index) |
*ret_sampler_index = sampler_index; |
return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size, |
false, simd_mode, msg_type, sampler_index, binding_table_index); |
} |
static void |
fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &fcc->tc; |
struct toy_dst dst[4], tmp[4]; |
struct toy_src desc; |
unsigned sampler_index; |
int swizzles[4], i; |
bool need_filter; |
desc = fs_prepare_tgsi_sampling(tc, inst, |
fcc->first_free_mrf, |
fcc->variant->saturate_tex_coords, |
&sampler_index); |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_TXF: |
case TOY_OPCODE_TGSI_TXQ: |
case TOY_OPCODE_TGSI_TXQ_LZ: |
need_filter = false; |
break; |
default: |
need_filter = true; |
break; |
} |
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER); |
inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0); |
inst->src[1] = desc; |
for (i = 2; i < Elements(inst->src); i++) |
inst->src[i] = tsrc_null(); |
/* write to temps first */ |
tc_alloc_tmp4(tc, tmp); |
for (i = 0; i < 4; i++) |
tmp[i].type = inst->dst.type; |
tdst_transpose(inst->dst, dst); |
inst->dst = tmp[0]; |
tc_move_inst(tc, inst); |
if (need_filter) { |
assert(sampler_index < fcc->variant->num_sampler_views); |
swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r; |
swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g; |
swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b; |
swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a; |
} |
else { |
swizzles[0] = PIPE_SWIZZLE_RED; |
swizzles[1] = PIPE_SWIZZLE_GREEN; |
swizzles[2] = PIPE_SWIZZLE_BLUE; |
swizzles[3] = PIPE_SWIZZLE_ALPHA; |
} |
/* swizzle the results */ |
for (i = 0; i < 4; i++) { |
switch (swizzles[i]) { |
case PIPE_SWIZZLE_ZERO: |
tc_MOV(tc, dst[i], tsrc_imm_f(0.0f)); |
break; |
case PIPE_SWIZZLE_ONE: |
tc_MOV(tc, dst[i], tsrc_imm_f(1.0f)); |
break; |
default: |
tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]])); |
break; |
} |
} |
} |
static void |
fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
struct toy_dst dst[4]; |
struct toy_src src[4]; |
int i; |
tdst_transpose(inst->dst, dst); |
tsrc_transpose(inst->src[0], src); |
/* |
* Every four fragments are from a 2x2 subspan, with |
* |
* fragment 1 on the top-left, |
* fragment 2 on the top-right, |
* fragment 3 on the bottom-left, |
* fragment 4 on the bottom-right. |
* |
* DDX should thus produce |
* |
* dst = src.yyww - src.xxzz |
* |
* and DDY should produce |
* |
* dst = src.zzww - src.xxyy |
* |
* But since we are in BRW_ALIGN_1, swizzling does not work and we have to |
* play with the region parameters. |
*/ |
if (inst->opcode == TOY_OPCODE_DDX) { |
for (i = 0; i < 4; i++) { |
struct toy_src left, right; |
left = tsrc_rect(src[i], TOY_RECT_220); |
right = tsrc_offset(left, 0, 1); |
tc_ADD(tc, dst[i], right, tsrc_negate(left)); |
} |
} |
else { |
for (i = 0; i < 4; i++) { |
struct toy_src top, bottom; |
/* approximate with dst = src.zzzz - src.xxxx */ |
top = tsrc_rect(src[i], TOY_RECT_440); |
bottom = tsrc_offset(top, 0, 2); |
tc_ADD(tc, dst[i], bottom, tsrc_negate(top)); |
} |
} |
tc_discard_inst(tc, inst); |
} |
static void |
fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
/* fs_write_fb() has set up the message registers */ |
toy_compiler_lower_to_send(tc, inst, true, |
GEN6_SFID_DATAPORT_RENDER_CACHE); |
} |
static void |
fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
struct toy_dst pixel_mask_dst; |
struct toy_src f0, pixel_mask; |
struct toy_inst *tmp; |
/* lower half of r1.7:ud */ |
pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4)); |
pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010); |
f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, BRW_ARF_FLAG, 0)), TOY_RECT_010); |
/* KILL or KILL_IF */ |
if (tsrc_is_null(inst->src[0])) { |
struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0)); |
struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, BRW_ARF_FLAG, 0)); |
/* create a mask that masks out all pixels */ |
tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010)); |
tmp->exec_size = BRW_EXECUTE_1; |
tmp->mask_ctrl = BRW_MASK_DISABLE; |
tc_CMP(tc, tdst_null(), dummy, dummy, BRW_CONDITIONAL_NEQ); |
/* swapping the two src operands breaks glBitmap()!? */ |
tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask); |
tmp->exec_size = BRW_EXECUTE_1; |
tmp->mask_ctrl = BRW_MASK_DISABLE; |
} |
else { |
struct toy_src src[4]; |
int i; |
tsrc_transpose(inst->src[0], src); |
/* mask out killed pixels */ |
for (i = 0; i < 4; i++) { |
tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f), |
BRW_CONDITIONAL_GE); |
/* swapping the two src operands breaks glBitmap()!? */ |
tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask); |
tmp->exec_size = BRW_EXECUTE_1; |
tmp->mask_ctrl = BRW_MASK_DISABLE; |
} |
} |
tc_discard_inst(tc, inst); |
} |
static void |
fs_lower_virtual_opcodes(struct fs_compile_context *fcc) |
{ |
struct toy_compiler *tc = &fcc->tc; |
struct toy_inst *inst; |
/* lower TGSI's first, as they might be lowered to other virtual opcodes */ |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_IN: |
case TOY_OPCODE_TGSI_CONST: |
case TOY_OPCODE_TGSI_SV: |
case TOY_OPCODE_TGSI_IMM: |
fs_lower_opcode_tgsi_direct(fcc, inst); |
break; |
case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
case TOY_OPCODE_TGSI_INDIRECT_STORE: |
fs_lower_opcode_tgsi_indirect(fcc, inst); |
break; |
case TOY_OPCODE_TGSI_TEX: |
case TOY_OPCODE_TGSI_TXB: |
case TOY_OPCODE_TGSI_TXD: |
case TOY_OPCODE_TGSI_TXL: |
case TOY_OPCODE_TGSI_TXP: |
case TOY_OPCODE_TGSI_TXF: |
case TOY_OPCODE_TGSI_TXQ: |
case TOY_OPCODE_TGSI_TXQ_LZ: |
case TOY_OPCODE_TGSI_TEX2: |
case TOY_OPCODE_TGSI_TXB2: |
case TOY_OPCODE_TGSI_TXL2: |
case TOY_OPCODE_TGSI_SAMPLE: |
case TOY_OPCODE_TGSI_SAMPLE_I: |
case TOY_OPCODE_TGSI_SAMPLE_I_MS: |
case TOY_OPCODE_TGSI_SAMPLE_B: |
case TOY_OPCODE_TGSI_SAMPLE_C: |
case TOY_OPCODE_TGSI_SAMPLE_C_LZ: |
case TOY_OPCODE_TGSI_SAMPLE_D: |
case TOY_OPCODE_TGSI_SAMPLE_L: |
case TOY_OPCODE_TGSI_GATHER4: |
case TOY_OPCODE_TGSI_SVIEWINFO: |
case TOY_OPCODE_TGSI_SAMPLE_POS: |
case TOY_OPCODE_TGSI_SAMPLE_INFO: |
fs_lower_opcode_tgsi_sampling(fcc, inst); |
break; |
} |
} |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case TOY_OPCODE_INV: |
case TOY_OPCODE_LOG: |
case TOY_OPCODE_EXP: |
case TOY_OPCODE_SQRT: |
case TOY_OPCODE_RSQ: |
case TOY_OPCODE_SIN: |
case TOY_OPCODE_COS: |
case TOY_OPCODE_FDIV: |
case TOY_OPCODE_POW: |
case TOY_OPCODE_INT_DIV_QUOTIENT: |
case TOY_OPCODE_INT_DIV_REMAINDER: |
toy_compiler_lower_math(tc, inst); |
break; |
case TOY_OPCODE_DDX: |
case TOY_OPCODE_DDY: |
fs_lower_opcode_derivative(tc, inst); |
break; |
case TOY_OPCODE_FB_WRITE: |
fs_lower_opcode_fb_write(tc, inst); |
break; |
case TOY_OPCODE_KIL: |
fs_lower_opcode_kil(tc, inst); |
break; |
default: |
if (inst->opcode > 127) |
tc_fail(tc, "unhandled virtual opcode"); |
break; |
} |
} |
} |
/** |
* Compile the shader. |
*/ |
static bool |
fs_compile(struct fs_compile_context *fcc) |
{ |
struct toy_compiler *tc = &fcc->tc; |
struct ilo_shader *sh = fcc->shader; |
fs_lower_virtual_opcodes(fcc); |
toy_compiler_legalize_for_ra(tc); |
toy_compiler_optimize(tc); |
toy_compiler_allocate_registers(tc, |
fcc->first_free_grf, |
fcc->last_free_grf, |
fcc->num_grf_per_vrf); |
toy_compiler_legalize_for_asm(tc); |
if (tc->fail) { |
ilo_err("failed to legalize FS instructions: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_FS) { |
ilo_printf("legalized instructions:\n"); |
toy_compiler_dump(tc); |
ilo_printf("\n"); |
} |
if (true) { |
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); |
} |
else { |
static const uint32_t microcode[] = { |
/* fill in the microcode here */ |
0x0, 0x0, 0x0, 0x0, |
}; |
const bool swap = true; |
sh->kernel_size = sizeof(microcode); |
sh->kernel = MALLOC(sh->kernel_size); |
if (sh->kernel) { |
const int num_dwords = sizeof(microcode) / 4; |
const uint32_t *src = microcode; |
uint32_t *dst = (uint32_t *) sh->kernel; |
int i; |
for (i = 0; i < num_dwords; i += 4) { |
if (swap) { |
dst[i + 0] = src[i + 3]; |
dst[i + 1] = src[i + 2]; |
dst[i + 2] = src[i + 1]; |
dst[i + 3] = src[i + 0]; |
} |
else { |
memcpy(dst, src, 16); |
} |
} |
} |
} |
if (!sh->kernel) { |
ilo_err("failed to compile FS: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_FS) { |
ilo_printf("disassembly:\n"); |
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size); |
ilo_printf("\n"); |
} |
return true; |
} |
/** |
* Emit instructions to write the color buffers (and the depth buffer). |
*/ |
static void |
fs_write_fb(struct fs_compile_context *fcc) |
{ |
struct toy_compiler *tc = &fcc->tc; |
int base_mrf = fcc->first_free_mrf; |
const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0)); |
bool header_present = false; |
struct toy_src desc; |
unsigned msg_type, ctrl; |
int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs; |
int pos_slot = -1, cbuf, i; |
for (i = 0; i < Elements(color_slots); i++) |
color_slots[i] = -1; |
for (i = 0; i < fcc->tgsi.num_outputs; i++) { |
if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) { |
assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots)); |
color_slots[fcc->tgsi.outputs[i].semantic_index] = i; |
} |
else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) { |
pos_slot = i; |
} |
} |
num_cbufs = fcc->variant->u.fs.num_cbufs; |
/* still need to send EOT (and probably depth) */ |
if (!num_cbufs) |
num_cbufs = 1; |
/* we need the header to specify the pixel mask or render target */ |
if (fcc->tgsi.uses_kill || num_cbufs > 1) { |
const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
struct toy_inst *inst; |
inst = tc_MOV(tc, header, r0); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
base_mrf += fcc->num_grf_per_vrf; |
/* this is a two-register header */ |
if (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) { |
inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0)); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
base_mrf += fcc->num_grf_per_vrf; |
} |
header_present = true; |
} |
for (cbuf = 0; cbuf < num_cbufs; cbuf++) { |
const int slot = |
color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf]; |
int mrf = base_mrf, vrf; |
struct toy_src src[4]; |
if (slot >= 0) { |
const unsigned undefined_mask = |
fcc->tgsi.outputs[slot].undefined_mask; |
const int index = fcc->tgsi.outputs[slot].index; |
vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index); |
if (vrf >= 0) { |
const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0); |
tsrc_transpose(tmp, src); |
} |
else { |
/* use (0, 0, 0, 0) */ |
tsrc_transpose(tsrc_imm_f(0.0f), src); |
} |
for (i = 0; i < 4; i++) { |
const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); |
if (undefined_mask & (1 << i)) |
src[i] = tsrc_imm_f(0.0f); |
tc_MOV(tc, dst, src[i]); |
mrf += fcc->num_grf_per_vrf; |
} |
} |
else { |
/* use (0, 0, 0, 0) */ |
for (i = 0; i < 4; i++) { |
const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); |
tc_MOV(tc, dst, tsrc_imm_f(0.0f)); |
mrf += fcc->num_grf_per_vrf; |
} |
} |
/* select BLEND_STATE[rt] */ |
if (cbuf > 0) { |
struct toy_inst *inst; |
inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf)); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
inst->exec_size = BRW_EXECUTE_1; |
inst->src[0].rect = TOY_RECT_010; |
} |
if (cbuf == 0 && pos_slot >= 0) { |
const int index = fcc->tgsi.outputs[pos_slot].index; |
const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); |
struct toy_src src[4]; |
int vrf; |
vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index); |
if (vrf >= 0) { |
const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0); |
tsrc_transpose(tmp, src); |
} |
else { |
/* use (0, 0, 0, 0) */ |
tsrc_transpose(tsrc_imm_f(0.0f), src); |
} |
/* only Z */ |
tc_MOV(tc, dst, src[2]); |
mrf += fcc->num_grf_per_vrf; |
} |
msg_type = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? |
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE : |
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; |
ctrl = (cbuf == num_cbufs - 1) << 12 | |
msg_type << 8; |
desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1, |
mrf - fcc->first_free_mrf, 0, |
header_present, false, |
GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, |
ctrl, ILO_WM_DRAW_SURFACE(cbuf)); |
tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(), |
tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc); |
} |
} |
/** |
* Set up shader outputs for fixed-function units. |
*/ |
static void |
fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi) |
{ |
int i; |
sh->out.count = tgsi->num_outputs; |
for (i = 0; i < tgsi->num_outputs; i++) { |
sh->out.register_indices[i] = tgsi->outputs[i].index; |
sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name; |
sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index; |
if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) |
sh->out.has_pos = true; |
} |
} |
/** |
* Set up shader inputs for fixed-function units. |
*/ |
static void |
fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi, |
bool flatshade) |
{ |
int i; |
sh->in.count = tgsi->num_inputs; |
for (i = 0; i < tgsi->num_inputs; i++) { |
sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name; |
sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index; |
sh->in.interp[i] = tgsi->inputs[i].interp; |
sh->in.centroid[i] = tgsi->inputs[i].centroid; |
if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) { |
sh->in.has_pos = true; |
continue; |
} |
else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) { |
continue; |
} |
switch (tgsi->inputs[i].interp) { |
case TGSI_INTERPOLATE_CONSTANT: |
sh->in.const_interp_enable |= 1 << i; |
break; |
case TGSI_INTERPOLATE_LINEAR: |
sh->in.has_linear_interp = true; |
if (tgsi->inputs[i].centroid) { |
sh->in.barycentric_interpolation_mode |= |
1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC; |
} |
else { |
sh->in.barycentric_interpolation_mode |= |
1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC; |
} |
break; |
case TGSI_INTERPOLATE_COLOR: |
if (flatshade) { |
sh->in.const_interp_enable |= 1 << i; |
break; |
} |
/* fall through */ |
case TGSI_INTERPOLATE_PERSPECTIVE: |
if (tgsi->inputs[i].centroid) { |
sh->in.barycentric_interpolation_mode |= |
1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC; |
} |
else { |
sh->in.barycentric_interpolation_mode |= |
1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; |
} |
break; |
default: |
break; |
} |
} |
} |
static int |
fs_setup_payloads(struct fs_compile_context *fcc) |
{ |
const struct ilo_shader *sh = fcc->shader; |
int grf, i; |
grf = 0; |
/* r0: header */ |
grf++; |
/* r1-r2: coordinates and etc. */ |
grf += (fcc->dispatch_mode == GEN6_WM_32_DISPATCH_ENABLE) ? 2 : 1; |
for (i = 0; i < Elements(fcc->payloads); i++) { |
int interp; |
/* r3-r26 or r32-r55: barycentric interpolation parameters */ |
for (interp = 0; interp < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; interp++) { |
if (!(sh->in.barycentric_interpolation_mode & (1 << interp))) |
continue; |
fcc->payloads[i].barycentric_interps[interp] = grf; |
grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 2 : 4; |
} |
/* r27-r28 or r56-r57: interpoloated depth */ |
if (sh->in.has_pos) { |
fcc->payloads[i].source_depth = grf; |
grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2; |
} |
/* r29-r30 or r58-r59: interpoloated w */ |
if (sh->in.has_pos) { |
fcc->payloads[i].source_w = grf; |
grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2; |
} |
/* r31 or r60: position offset */ |
if (false) { |
fcc->payloads[i].pos_offset = grf; |
grf++; |
} |
if (fcc->dispatch_mode != GEN6_WM_32_DISPATCH_ENABLE) |
break; |
} |
return grf; |
} |
/** |
* Translate the TGSI tokens. |
*/ |
static bool |
fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, |
struct toy_tgsi *tgsi) |
{ |
if (ilo_debug & ILO_DEBUG_FS) { |
ilo_printf("dumping fragment shader\n"); |
ilo_printf("\n"); |
tgsi_dump(tokens, 0); |
ilo_printf("\n"); |
} |
toy_compiler_translate_tgsi(tc, tokens, false, tgsi); |
if (tc->fail) { |
ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_FS) { |
ilo_printf("TGSI translator:\n"); |
toy_tgsi_dump(tgsi); |
ilo_printf("\n"); |
toy_compiler_dump(tc); |
ilo_printf("\n"); |
} |
return true; |
} |
/** |
* Set up FS compile context. This includes translating the TGSI tokens. |
*/ |
static bool |
fs_setup(struct fs_compile_context *fcc, |
const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
int num_consts; |
memset(fcc, 0, sizeof(*fcc)); |
fcc->shader = CALLOC_STRUCT(ilo_shader); |
if (!fcc->shader) |
return false; |
fcc->variant = variant; |
toy_compiler_init(&fcc->tc, state->info.dev); |
fcc->dispatch_mode = GEN6_WM_8_DISPATCH_ENABLE; |
fcc->tc.templ.access_mode = BRW_ALIGN_1; |
if (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) { |
fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1H; |
fcc->tc.templ.exec_size = BRW_EXECUTE_16; |
} |
else { |
fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1Q; |
fcc->tc.templ.exec_size = BRW_EXECUTE_8; |
} |
fcc->tc.rect_linear_width = 8; |
/* |
* The classic driver uses the sampler cache (gen6) or the data cache |
* (gen7). Why? |
*/ |
fcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE; |
if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) { |
toy_compiler_cleanup(&fcc->tc); |
FREE(fcc->shader); |
return false; |
} |
fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade); |
fs_setup_shader_out(fcc->shader, &fcc->tgsi); |
/* we do not make use of push constant buffers yet */ |
num_consts = 0; |
fcc->first_const_grf = fs_setup_payloads(fcc); |
fcc->first_attr_grf = fcc->first_const_grf + num_consts; |
fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2; |
fcc->last_free_grf = 127; |
/* m0 is reserved for system routines */ |
fcc->first_free_mrf = 1; |
fcc->last_free_mrf = 15; |
/* instructions are compressed with BRW_EXECUTE_16 */ |
fcc->num_grf_per_vrf = |
(fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? 2 : 1; |
if (fcc->tc.dev->gen >= ILO_GEN(7)) { |
fcc->last_free_grf -= 15; |
fcc->first_free_mrf = fcc->last_free_grf + 1; |
fcc->last_free_mrf = fcc->first_free_mrf + 14; |
} |
fcc->shader->in.start_grf = fcc->first_const_grf; |
fcc->shader->has_kill = fcc->tgsi.uses_kill; |
fcc->shader->dispatch_16 = |
(fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE); |
return true; |
} |
/** |
* Compile the fragment shader. |
*/ |
struct ilo_shader * |
ilo_shader_compile_fs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
struct fs_compile_context fcc; |
if (!fs_setup(&fcc, state, variant)) |
return NULL; |
fs_write_fb(&fcc); |
if (!fs_compile(&fcc)) { |
FREE(fcc.shader); |
fcc.shader = NULL; |
} |
toy_tgsi_cleanup(&fcc.tgsi); |
toy_compiler_cleanup(&fcc.tc); |
return fcc.shader; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_gs.c |
---|
0,0 → 1,1449 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "tgsi/tgsi_dump.h" |
#include "toy_compiler.h" |
#include "toy_tgsi.h" |
#include "toy_legalize.h" |
#include "toy_optimize.h" |
#include "toy_helpers.h" |
#include "ilo_shader_internal.h" |
/* XXX Below is proof-of-concept code. Skip this file! */ |
/* |
* TODO |
* - primitive id is in r0.1. FS receives PID as a flat attribute. |
* - set VUE header m0.1 for layered rendering |
*/ |
struct gs_compile_context { |
struct ilo_shader *shader; |
const struct ilo_shader_variant *variant; |
const struct pipe_stream_output_info *so_info; |
struct toy_compiler tc; |
struct toy_tgsi tgsi; |
int output_map[PIPE_MAX_SHADER_OUTPUTS]; |
bool write_so; |
bool write_vue; |
int in_vue_size; |
int in_vue_count; |
int out_vue_size; |
int out_vue_min_count; |
bool is_static; |
struct { |
struct toy_src header; |
struct toy_src svbi; |
struct toy_src vues[6]; |
} payload; |
struct { |
struct toy_dst urb_write_header; |
bool prim_start; |
bool prim_end; |
int prim_type; |
struct toy_dst tmp; |
/* buffered tgsi_outs */ |
struct toy_dst buffers[3]; |
int buffer_needed, buffer_cur; |
struct toy_dst so_written; |
struct toy_dst so_index; |
struct toy_src tgsi_outs[PIPE_MAX_SHADER_OUTPUTS]; |
} vars; |
struct { |
struct toy_dst total_vertices; |
struct toy_dst total_prims; |
struct toy_dst num_vertices; |
struct toy_dst num_vertices_in_prim; |
} dynamic_data; |
struct { |
int total_vertices; |
int total_prims; |
/* this limits the max vertice count to be 256 */ |
uint32_t last_vertex[8]; |
int num_vertices; |
int num_vertices_in_prim; |
} static_data; |
int first_free_grf; |
int last_free_grf; |
int first_free_mrf; |
int last_free_mrf; |
}; |
static void |
gs_COPY8(struct toy_compiler *tc, struct toy_dst dst, struct toy_src src) |
{ |
struct toy_inst *inst; |
inst = tc_MOV(tc, dst, src); |
inst->exec_size = BRW_EXECUTE_8; |
inst->mask_ctrl = BRW_MASK_DISABLE; |
} |
static void |
gs_COPY4(struct toy_compiler *tc, |
struct toy_dst dst, int dst_ch, |
struct toy_src src, int src_ch) |
{ |
struct toy_inst *inst; |
inst = tc_MOV(tc, |
tdst_offset(dst, 0, dst_ch), |
tsrc_offset(src, 0, src_ch)); |
inst->exec_size = BRW_EXECUTE_4; |
inst->mask_ctrl = BRW_MASK_DISABLE; |
} |
static void |
gs_COPY1(struct toy_compiler *tc, |
struct toy_dst dst, int dst_ch, |
struct toy_src src, int src_ch) |
{ |
struct toy_inst *inst; |
inst = tc_MOV(tc, |
tdst_offset(dst, 0, dst_ch), |
tsrc_rect(tsrc_offset(src, 0, src_ch), TOY_RECT_010)); |
inst->exec_size = BRW_EXECUTE_1; |
inst->mask_ctrl = BRW_MASK_DISABLE; |
} |
static void |
gs_init_vars(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_dst dst; |
/* init URB_WRITE header */ |
dst = gcc->vars.urb_write_header; |
gs_COPY8(tc, dst, gcc->payload.header); |
gcc->vars.prim_start = true; |
gcc->vars.prim_end = false; |
switch (gcc->out_vue_min_count) { |
case 1: |
gcc->vars.prim_type = _3DPRIM_POINTLIST; |
break; |
case 2: |
gcc->vars.prim_type = _3DPRIM_LINESTRIP; |
break; |
case 3: |
gcc->vars.prim_type = _3DPRIM_TRISTRIP; |
break; |
} |
if (gcc->write_so) |
tc_MOV(tc, gcc->vars.so_written, tsrc_imm_d(0)); |
} |
static void |
gs_save_output(struct gs_compile_context *gcc, const struct toy_src *outs) |
{ |
struct toy_compiler *tc = &gcc->tc; |
const struct toy_dst buf = gcc->vars.buffers[gcc->vars.buffer_cur]; |
int i; |
for (i = 0; i < gcc->shader->out.count; i++) |
tc_MOV(tc, tdst_offset(buf, i, 0), outs[i]); |
/* advance the cursor */ |
gcc->vars.buffer_cur++; |
gcc->vars.buffer_cur %= gcc->vars.buffer_needed; |
} |
static void |
gs_write_so(struct gs_compile_context *gcc, |
struct toy_dst dst, |
struct toy_src index, struct toy_src out, |
bool send_write_commit_message, |
int binding_table_index) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_dst mrf_header; |
struct toy_src desc; |
mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); |
/* m0.5: destination index */ |
gs_COPY1(tc, mrf_header, 5, index, 0); |
/* m0.0 - m0.3: RGBA */ |
gs_COPY4(tc, mrf_header, 0, tsrc_type(out, mrf_header.type), 0); |
desc = tsrc_imm_mdesc_data_port(tc, false, |
1, send_write_commit_message, |
true, send_write_commit_message, |
GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 0, |
binding_table_index); |
tc_SEND(tc, dst, tsrc_from(mrf_header), desc, |
GEN6_SFID_DATAPORT_RENDER_CACHE); |
} |
static void |
gs_write_vue(struct gs_compile_context *gcc, |
struct toy_dst dst, struct toy_src msg_header, |
const struct toy_src *outs, int num_outs, |
bool eot) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_dst mrf_header; |
struct toy_src desc; |
int sent = 0; |
mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); |
gs_COPY8(tc, mrf_header, msg_header); |
while (sent < num_outs) { |
int mrf = gcc->first_free_mrf + 1; |
const int mrf_avail = gcc->last_free_mrf - mrf + 1; |
int msg_len, num_entries, i; |
bool complete; |
num_entries = (num_outs - sent + 1) / 2; |
complete = true; |
if (num_entries > mrf_avail) { |
num_entries = mrf_avail; |
complete = false; |
} |
for (i = 0; i < num_entries; i++) { |
gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 0, |
outs[sent + 2 * i], 0); |
if (sent + i * 2 + 1 < gcc->shader->out.count) { |
gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 4, |
outs[sent + 2 * i + 1], 0); |
} |
mrf++; |
} |
/* do not forget the header */ |
msg_len = num_entries + 1; |
if (complete) { |
desc = tsrc_imm_mdesc_urb(tc, |
eot, msg_len, !eot, true, true, !eot, |
BRW_URB_SWIZZLE_NONE, sent, 0); |
} |
else { |
desc = tsrc_imm_mdesc_urb(tc, |
false, msg_len, 0, false, true, false, |
BRW_URB_SWIZZLE_NONE, sent, 0); |
} |
tc_add2(tc, TOY_OPCODE_URB_WRITE, |
(complete) ? dst : tdst_null(), tsrc_from(mrf_header), desc); |
sent += num_entries * 2; |
} |
} |
static void |
gs_ff_sync(struct gs_compile_context *gcc, struct toy_dst dst, |
struct toy_src num_prims) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_dst mrf_header = |
tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); |
struct toy_src desc; |
bool allocate; |
gs_COPY8(tc, mrf_header, gcc->payload.header); |
/* set NumSOVertsToWrite and NumSOPrimsNeeded */ |
if (gcc->write_so) { |
if (num_prims.file == TOY_FILE_IMM) { |
const uint32_t v = |
(num_prims.val32 * gcc->in_vue_count) << 16 | num_prims.val32; |
gs_COPY1(tc, mrf_header, 0, tsrc_imm_d(v), 0); |
} |
else { |
struct toy_dst m0_0 = tdst_d(gcc->vars.tmp); |
tc_MUL(tc, m0_0, num_prims, tsrc_imm_d(gcc->in_vue_count << 16)); |
tc_OR(tc, m0_0, tsrc_from(m0_0), num_prims); |
gs_COPY1(tc, mrf_header, 0, tsrc_from(m0_0), 0); |
} |
} |
/* set NumGSPrimsGenerated */ |
if (gcc->write_vue) |
gs_COPY1(tc, mrf_header, 1, num_prims, 0); |
/* |
* From the Sandy Bridge PRM, volume 2 part 1, page 173: |
* |
* "Programming Note: If the GS stage is enabled, software must always |
* allocate at least one GS URB Entry. This is true even if the GS |
* thread never needs to output vertices to the pipeline, e.g., when |
* only performing stream output. This is an artifact of the need to |
* pass the GS thread an initial destination URB handle." |
*/ |
allocate = true; |
desc = tsrc_imm_mdesc_urb(tc, false, 1, 1, |
false, false, allocate, |
BRW_URB_SWIZZLE_NONE, 0, 1); |
tc_SEND(tc, dst, tsrc_from(mrf_header), desc, BRW_SFID_URB); |
} |
static void |
gs_discard(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_dst mrf_header; |
struct toy_src desc; |
mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); |
gs_COPY8(tc, mrf_header, tsrc_from(gcc->vars.urb_write_header)); |
desc = tsrc_imm_mdesc_urb(tc, |
true, 1, 0, true, false, false, |
BRW_URB_SWIZZLE_NONE, 0, 0); |
tc_add2(tc, TOY_OPCODE_URB_WRITE, |
tdst_null(), tsrc_from(mrf_header), desc); |
} |
static void |
gs_lower_opcode_endprim(struct gs_compile_context *gcc, struct toy_inst *inst) |
{ |
/* if has control flow, set PrimEnd on the last vertex and URB_WRITE */ |
} |
static void |
gs_lower_opcode_emit_vue_dynamic(struct gs_compile_context *gcc) |
{ |
/* TODO similar to the static version */ |
/* |
* When SO is enabled and the inputs are lines or triangles, vertices are |
* always buffered. we can defer the emission of the current vertex until |
* the next EMIT or ENDPRIM. Or, we can emit two URB_WRITEs with the later |
* patching the former. |
*/ |
} |
static void |
gs_lower_opcode_emit_so_dynamic(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
tc_IF(tc, tdst_null(), |
tsrc_from(gcc->dynamic_data.num_vertices_in_prim), |
tsrc_imm_d(gcc->out_vue_min_count), |
BRW_CONDITIONAL_GE); |
{ |
tc_ADD(tc, gcc->vars.tmp, tsrc_from(gcc->vars.so_index), tsrc_imm_d(0x03020100)); |
/* TODO same as static version */ |
} |
tc_ENDIF(tc); |
tc_ADD(tc, gcc->vars.so_index, |
tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count)); |
} |
static void |
gs_lower_opcode_emit_vue_static(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_inst *inst2; |
bool eot; |
eot = (gcc->static_data.num_vertices == gcc->static_data.total_vertices); |
gcc->vars.prim_end = |
((gcc->static_data.last_vertex[(gcc->static_data.num_vertices - 1) / 32] & |
1 << ((gcc->static_data.num_vertices - 1) % 32)) != 0); |
if (eot && gcc->write_so) { |
inst2 = tc_OR(tc, tdst_offset(gcc->vars.urb_write_header, 0, 2), |
tsrc_from(gcc->vars.so_written), |
tsrc_imm_d(gcc->vars.prim_type << 2 | |
gcc->vars.prim_start << 1 | |
gcc->vars.prim_end)); |
inst2->exec_size = BRW_EXECUTE_1; |
inst2->src[0] = tsrc_rect(inst2->src[0], TOY_RECT_010); |
inst2->src[1] = tsrc_rect(inst2->src[1], TOY_RECT_010); |
} |
else { |
gs_COPY1(tc, gcc->vars.urb_write_header, 2, |
tsrc_imm_d(gcc->vars.prim_type << 2 | |
gcc->vars.prim_start << 1 | |
gcc->vars.prim_end), 0); |
} |
gs_write_vue(gcc, tdst_d(gcc->vars.tmp), |
tsrc_from(gcc->vars.urb_write_header), |
gcc->vars.tgsi_outs, |
gcc->shader->out.count, eot); |
if (!eot) { |
gs_COPY1(tc, gcc->vars.urb_write_header, 0, |
tsrc_from(tdst_d(gcc->vars.tmp)), 0); |
} |
gcc->vars.prim_start = gcc->vars.prim_end; |
gcc->vars.prim_end = false; |
} |
static void |
gs_lower_opcode_emit_so_static(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_inst *inst; |
int i, j; |
if (gcc->static_data.num_vertices_in_prim < gcc->out_vue_min_count) |
return; |
inst = tc_MOV(tc, tdst_w(gcc->vars.tmp), tsrc_imm_v(0x03020100)); |
inst->exec_size = BRW_EXECUTE_8; |
inst->mask_ctrl = BRW_MASK_DISABLE; |
tc_ADD(tc, tdst_d(gcc->vars.tmp), tsrc_from(tdst_d(gcc->vars.tmp)), |
tsrc_rect(tsrc_from(gcc->vars.so_index), TOY_RECT_010)); |
tc_IF(tc, tdst_null(), |
tsrc_rect(tsrc_offset(tsrc_from(tdst_d(gcc->vars.tmp)), 0, gcc->out_vue_min_count - 1), TOY_RECT_010), |
tsrc_rect(tsrc_offset(gcc->payload.svbi, 0, 4), TOY_RECT_010), |
BRW_CONDITIONAL_LE); |
{ |
for (i = 0; i < gcc->out_vue_min_count; i++) { |
for (j = 0; j < gcc->so_info->num_outputs; j++) { |
const int idx = gcc->so_info->output[j].register_index; |
struct toy_src index, out; |
int binding_table_index; |
bool write_commit; |
index = tsrc_d(tsrc_offset(tsrc_from(gcc->vars.tmp), 0, i)); |
if (i == gcc->out_vue_min_count - 1) { |
out = gcc->vars.tgsi_outs[idx]; |
} |
else { |
/* gcc->vars.buffer_cur also points to the first vertex */ |
const int buf = |
(gcc->vars.buffer_cur + i) % gcc->vars.buffer_needed; |
out = tsrc_offset(tsrc_from(gcc->vars.buffers[buf]), idx, 0); |
} |
out = tsrc_offset(out, 0, gcc->so_info->output[j].start_component); |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 19: |
* |
* "The Kernel must do a write commit on the last write to DAP |
* prior to a URB_WRITE with End of Thread." |
*/ |
write_commit = |
(gcc->static_data.num_vertices == gcc->static_data.total_vertices && |
i == gcc->out_vue_min_count - 1 && |
j == gcc->so_info->num_outputs - 1); |
binding_table_index = ILO_GS_SO_SURFACE(j); |
gs_write_so(gcc, gcc->vars.tmp, index, |
out, write_commit, binding_table_index); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 168: |
* |
* "The write commit does not modify the destination register, but |
* merely clears the dependency associated with the destination |
* register. Thus, a simple "mov" instruction using the register as a |
* source is sufficient to wait for the write commit to occur." |
*/ |
if (write_commit) |
tc_MOV(tc, gcc->vars.tmp, tsrc_from(gcc->vars.tmp)); |
} |
} |
/* SONumPrimsWritten occupies the higher word of m0.2 of URB_WRITE */ |
tc_ADD(tc, gcc->vars.so_written, |
tsrc_from(gcc->vars.so_written), tsrc_imm_d(1 << 16)); |
tc_ADD(tc, gcc->vars.so_index, |
tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count)); |
} |
tc_ENDIF(tc); |
} |
static void |
gs_lower_opcode_emit_static(struct gs_compile_context *gcc, |
struct toy_inst *inst) |
{ |
gcc->static_data.num_vertices++; |
gcc->static_data.num_vertices_in_prim++; |
if (gcc->write_so) { |
gs_lower_opcode_emit_so_static(gcc); |
if (gcc->out_vue_min_count > 1 && |
gcc->static_data.num_vertices != gcc->static_data.total_vertices) |
gs_save_output(gcc, gcc->vars.tgsi_outs); |
} |
if (gcc->write_vue) |
gs_lower_opcode_emit_vue_static(gcc); |
} |
static void |
gs_lower_opcode_emit_dynamic(struct gs_compile_context *gcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &gcc->tc; |
tc_ADD(tc, gcc->dynamic_data.num_vertices, |
tsrc_from(gcc->dynamic_data.num_vertices), tsrc_imm_d(1)); |
tc_ADD(tc, gcc->dynamic_data.num_vertices_in_prim, |
tsrc_from(gcc->dynamic_data.num_vertices_in_prim), tsrc_imm_d(1)); |
if (gcc->write_so) { |
gs_lower_opcode_emit_so_dynamic(gcc); |
if (gcc->out_vue_min_count > 1) |
gs_save_output(gcc, gcc->vars.tgsi_outs); |
} |
if (gcc->write_vue) |
gs_lower_opcode_emit_vue_dynamic(gcc); |
} |
static void |
gs_lower_opcode_emit(struct gs_compile_context *gcc, struct toy_inst *inst) |
{ |
if (gcc->is_static) |
gs_lower_opcode_emit_static(gcc, inst); |
else |
gs_lower_opcode_emit_dynamic(gcc, inst); |
} |
static void |
gs_lower_opcode_tgsi_in(struct gs_compile_context *gcc, |
struct toy_dst dst, int dim, int idx) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_src attr; |
int slot, reg = -1, subreg; |
slot = toy_tgsi_find_input(&gcc->tgsi, idx); |
if (slot >= 0) { |
int i; |
for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) { |
if (gcc->variant->u.gs.semantic_names[i] == |
gcc->tgsi.inputs[slot].semantic_name && |
gcc->variant->u.gs.semantic_indices[i] == |
gcc->tgsi.inputs[slot].semantic_index) { |
reg = i / 2; |
subreg = (i % 2) * 4; |
break; |
} |
} |
} |
if (reg < 0) { |
tc_MOV(tc, dst, tsrc_imm_f(0.0f)); |
return; |
} |
/* fix vertex ordering for _3DPRIM_TRISTRIP_REVERSE */ |
if (gcc->in_vue_count == 3 && dim < 2) { |
struct toy_inst *inst; |
/* get PrimType */ |
inst = tc_AND(tc, tdst_d(gcc->vars.tmp), |
tsrc_offset(gcc->payload.header, 0, 2), tsrc_imm_d(0x1f)); |
inst->exec_size = BRW_EXECUTE_1; |
inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010); |
inst->src[1] = tsrc_rect(inst->src[1], TOY_RECT_010); |
inst = tc_CMP(tc, tdst_null(), tsrc_from(tdst_d(gcc->vars.tmp)), |
tsrc_imm_d(_3DPRIM_TRISTRIP_REVERSE), BRW_CONDITIONAL_NEQ); |
inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010); |
attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg); |
inst = tc_MOV(tc, dst, attr); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
/* swap IN[0] and IN[1] for _3DPRIM_TRISTRIP_REVERSE */ |
dim = !dim; |
attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg); |
inst = tc_MOV(tc, dst, attr); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
inst->pred_inv = true; |
} |
else { |
attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg); |
tc_MOV(tc, dst, attr); |
} |
} |
static void |
gs_lower_opcode_tgsi_imm(struct gs_compile_context *gcc, |
struct toy_dst dst, int idx) |
{ |
const uint32_t *imm; |
int ch; |
imm = toy_tgsi_get_imm(&gcc->tgsi, idx, NULL); |
for (ch = 0; ch < 4; ch++) { |
struct toy_inst *inst; |
/* raw moves */ |
inst = tc_MOV(&gcc->tc, |
tdst_writemask(tdst_ud(dst), 1 << ch), |
tsrc_imm_ud(imm[ch])); |
inst->access_mode = BRW_ALIGN_16; |
} |
} |
static void |
gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &gcc->tc; |
int dim, idx; |
assert(inst->src[0].file == TOY_FILE_IMM); |
dim = inst->src[0].val32; |
assert(inst->src[1].file == TOY_FILE_IMM); |
idx = inst->src[1].val32; |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_IN: |
gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx); |
/* fetch all dimensions */ |
if (dim == 0) { |
int i; |
for (i = 1; i < gcc->in_vue_count; i++) { |
const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx); |
struct toy_dst dst; |
if (vrf < 0) |
continue; |
dst = tdst(TOY_FILE_VRF, vrf, 0); |
gs_lower_opcode_tgsi_in(gcc, dst, i, idx); |
} |
} |
break; |
case TOY_OPCODE_TGSI_IMM: |
assert(!dim); |
gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx); |
break; |
case TOY_OPCODE_TGSI_CONST: |
case TOY_OPCODE_TGSI_SV: |
default: |
tc_fail(tc, "unhandled TGSI fetch"); |
break; |
} |
tc_discard_inst(tc, inst); |
} |
static void |
gs_lower_virtual_opcodes(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct toy_inst *inst; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_IN: |
case TOY_OPCODE_TGSI_CONST: |
case TOY_OPCODE_TGSI_SV: |
case TOY_OPCODE_TGSI_IMM: |
gs_lower_opcode_tgsi_direct(gcc, inst); |
break; |
case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
case TOY_OPCODE_TGSI_INDIRECT_STORE: |
/* TODO similar to VS */ |
tc_fail(tc, "no indirection support"); |
tc_discard_inst(tc, inst); |
break; |
case TOY_OPCODE_TGSI_TEX: |
case TOY_OPCODE_TGSI_TXB: |
case TOY_OPCODE_TGSI_TXD: |
case TOY_OPCODE_TGSI_TXL: |
case TOY_OPCODE_TGSI_TXP: |
case TOY_OPCODE_TGSI_TXF: |
case TOY_OPCODE_TGSI_TXQ: |
case TOY_OPCODE_TGSI_TXQ_LZ: |
case TOY_OPCODE_TGSI_TEX2: |
case TOY_OPCODE_TGSI_TXB2: |
case TOY_OPCODE_TGSI_TXL2: |
case TOY_OPCODE_TGSI_SAMPLE: |
case TOY_OPCODE_TGSI_SAMPLE_I: |
case TOY_OPCODE_TGSI_SAMPLE_I_MS: |
case TOY_OPCODE_TGSI_SAMPLE_B: |
case TOY_OPCODE_TGSI_SAMPLE_C: |
case TOY_OPCODE_TGSI_SAMPLE_C_LZ: |
case TOY_OPCODE_TGSI_SAMPLE_D: |
case TOY_OPCODE_TGSI_SAMPLE_L: |
case TOY_OPCODE_TGSI_GATHER4: |
case TOY_OPCODE_TGSI_SVIEWINFO: |
case TOY_OPCODE_TGSI_SAMPLE_POS: |
case TOY_OPCODE_TGSI_SAMPLE_INFO: |
/* TODO similar to VS */ |
tc_fail(tc, "no sampling support"); |
tc_discard_inst(tc, inst); |
break; |
case TOY_OPCODE_EMIT: |
gs_lower_opcode_emit(gcc, inst); |
tc_discard_inst(tc, inst); |
break; |
case TOY_OPCODE_ENDPRIM: |
gs_lower_opcode_endprim(gcc, inst); |
tc_discard_inst(tc, inst); |
break; |
default: |
break; |
} |
} |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case TOY_OPCODE_INV: |
case TOY_OPCODE_LOG: |
case TOY_OPCODE_EXP: |
case TOY_OPCODE_SQRT: |
case TOY_OPCODE_RSQ: |
case TOY_OPCODE_SIN: |
case TOY_OPCODE_COS: |
case TOY_OPCODE_FDIV: |
case TOY_OPCODE_POW: |
case TOY_OPCODE_INT_DIV_QUOTIENT: |
case TOY_OPCODE_INT_DIV_REMAINDER: |
toy_compiler_lower_math(tc, inst); |
break; |
case TOY_OPCODE_URB_WRITE: |
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB); |
break; |
default: |
if (inst->opcode > 127) |
tc_fail(tc, "unhandled virtual opcode"); |
break; |
} |
} |
} |
/** |
* Get the number of (tessellated) primitives generated by this shader. |
* Return false if that is unknown until runtime. |
*/ |
static void |
get_num_prims_static(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
const struct toy_inst *inst; |
int num_vertices_in_prim = 0, if_depth = 0, do_depth = 0; |
bool is_static = true; |
tc_head(tc); |
while ((inst = tc_next_no_skip(tc)) != NULL) { |
switch (inst->opcode) { |
case BRW_OPCODE_IF: |
if_depth++; |
break; |
case BRW_OPCODE_ENDIF: |
if_depth--; |
break; |
case BRW_OPCODE_DO: |
do_depth++; |
break; |
case BRW_OPCODE_WHILE: |
do_depth--; |
break; |
case TOY_OPCODE_EMIT: |
if (if_depth || do_depth) { |
is_static = false; |
} |
else { |
gcc->static_data.total_vertices++; |
num_vertices_in_prim++; |
if (num_vertices_in_prim >= gcc->out_vue_min_count) |
gcc->static_data.total_prims++; |
} |
break; |
case TOY_OPCODE_ENDPRIM: |
if (if_depth || do_depth) { |
is_static = false; |
} |
else { |
const int vertidx = gcc->static_data.total_vertices - 1; |
const int idx = vertidx / 32; |
const int subidx = vertidx % 32; |
gcc->static_data.last_vertex[idx] |= 1 << subidx; |
num_vertices_in_prim = 0; |
} |
break; |
default: |
break; |
} |
if (!is_static) |
break; |
} |
gcc->is_static = is_static; |
} |
/** |
* Compile the shader. |
*/ |
static bool |
gs_compile(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct ilo_shader *sh = gcc->shader; |
get_num_prims_static(gcc); |
if (gcc->is_static) { |
tc_head(tc); |
gs_init_vars(gcc); |
gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims)); |
gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0); |
if (gcc->write_so) |
gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1); |
tc_tail(tc); |
} |
else { |
tc_fail(tc, "no control flow support"); |
return false; |
} |
if (!gcc->write_vue) |
gs_discard(gcc); |
gs_lower_virtual_opcodes(gcc); |
toy_compiler_legalize_for_ra(tc); |
toy_compiler_optimize(tc); |
toy_compiler_allocate_registers(tc, |
gcc->first_free_grf, |
gcc->last_free_grf, |
1); |
toy_compiler_legalize_for_asm(tc); |
if (tc->fail) { |
ilo_err("failed to legalize GS instructions: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_GS) { |
ilo_printf("legalized instructions:\n"); |
toy_compiler_dump(tc); |
ilo_printf("\n"); |
} |
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); |
if (!sh->kernel) |
return false; |
if (ilo_debug & ILO_DEBUG_GS) { |
ilo_printf("disassembly:\n"); |
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size); |
ilo_printf("\n"); |
} |
return true; |
} |
static bool |
gs_compile_passthrough(struct gs_compile_context *gcc) |
{ |
struct toy_compiler *tc = &gcc->tc; |
struct ilo_shader *sh = gcc->shader; |
gcc->is_static = true; |
gcc->static_data.total_vertices = gcc->in_vue_count; |
gcc->static_data.total_prims = 1; |
gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1); |
gs_init_vars(gcc); |
gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims)); |
gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0); |
if (gcc->write_so) |
gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1); |
{ |
int vert, attr; |
for (vert = 0; vert < gcc->out_vue_min_count; vert++) { |
for (attr = 0; attr < gcc->shader->out.count; attr++) { |
tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]), |
tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4)); |
} |
gs_lower_opcode_emit(gcc, NULL); |
} |
gs_lower_opcode_endprim(gcc, NULL); |
} |
if (!gcc->write_vue) |
gs_discard(gcc); |
gs_lower_virtual_opcodes(gcc); |
toy_compiler_legalize_for_ra(tc); |
toy_compiler_optimize(tc); |
toy_compiler_allocate_registers(tc, |
gcc->first_free_grf, |
gcc->last_free_grf, |
1); |
toy_compiler_legalize_for_asm(tc); |
if (tc->fail) { |
ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_GS) { |
int i; |
ilo_printf("VUE count %d, VUE size %d\n", |
gcc->in_vue_count, gcc->in_vue_size); |
ilo_printf("%srasterizer discard\n", |
(gcc->variant->u.gs.rasterizer_discard) ? "" : "no "); |
for (i = 0; i < gcc->so_info->num_outputs; i++) { |
ilo_printf("SO[%d] = OUT[%d]\n", i, |
gcc->so_info->output[i].register_index); |
} |
ilo_printf("legalized instructions:\n"); |
toy_compiler_dump(tc); |
ilo_printf("\n"); |
} |
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); |
if (!sh->kernel) { |
ilo_err("failed to compile GS: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_GS) { |
ilo_printf("disassembly:\n"); |
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size); |
ilo_printf("\n"); |
} |
return true; |
} |
/** |
* Translate the TGSI tokens. |
*/ |
static bool |
gs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, |
struct toy_tgsi *tgsi) |
{ |
if (ilo_debug & ILO_DEBUG_GS) { |
ilo_printf("dumping geometry shader\n"); |
ilo_printf("\n"); |
tgsi_dump(tokens, 0); |
ilo_printf("\n"); |
} |
toy_compiler_translate_tgsi(tc, tokens, true, tgsi); |
if (tc->fail) |
return false; |
if (ilo_debug & ILO_DEBUG_GS) { |
ilo_printf("TGSI translator:\n"); |
toy_tgsi_dump(tgsi); |
ilo_printf("\n"); |
toy_compiler_dump(tc); |
ilo_printf("\n"); |
} |
return true; |
} |
/** |
* Set up shader inputs for fixed-function units. |
*/ |
static void |
gs_setup_shader_in(struct ilo_shader *sh, |
const struct ilo_shader_variant *variant) |
{ |
int i; |
for (i = 0; i < variant->u.gs.num_inputs; i++) { |
sh->in.semantic_names[i] = variant->u.gs.semantic_names[i]; |
sh->in.semantic_indices[i] = variant->u.gs.semantic_indices[i]; |
sh->in.interp[i] = TGSI_INTERPOLATE_CONSTANT; |
sh->in.centroid[i] = false; |
} |
sh->in.count = variant->u.gs.num_inputs; |
sh->in.has_pos = false; |
sh->in.has_linear_interp = false; |
sh->in.barycentric_interpolation_mode = 0; |
} |
/** |
* Set up shader outputs for fixed-function units. |
* |
* XXX share the code with VS |
*/ |
static void |
gs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi, |
bool output_clipdist, int *output_map) |
{ |
int psize_slot = -1, pos_slot = -1; |
int clipdist_slot[2] = { -1, -1 }; |
int color_slot[4] = { -1, -1, -1, -1 }; |
int num_outs, i; |
/* find out the slots of outputs that need special care */ |
for (i = 0; i < tgsi->num_outputs; i++) { |
switch (tgsi->outputs[i].semantic_name) { |
case TGSI_SEMANTIC_PSIZE: |
psize_slot = i; |
break; |
case TGSI_SEMANTIC_POSITION: |
pos_slot = i; |
break; |
case TGSI_SEMANTIC_CLIPDIST: |
if (tgsi->outputs[i].semantic_index) |
clipdist_slot[1] = i; |
else |
clipdist_slot[0] = i; |
break; |
case TGSI_SEMANTIC_COLOR: |
if (tgsi->outputs[i].semantic_index) |
color_slot[2] = i; |
else |
color_slot[0] = i; |
break; |
case TGSI_SEMANTIC_BCOLOR: |
if (tgsi->outputs[i].semantic_index) |
color_slot[3] = i; |
else |
color_slot[1] = i; |
break; |
default: |
break; |
} |
} |
/* the first two VUEs are always PSIZE and POSITION */ |
num_outs = 2; |
output_map[0] = psize_slot; |
output_map[1] = pos_slot; |
sh->out.register_indices[0] = |
(psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1; |
sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE; |
sh->out.semantic_indices[0] = 0; |
sh->out.register_indices[1] = |
(pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1; |
sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION; |
sh->out.semantic_indices[1] = 0; |
sh->out.has_pos = true; |
/* followed by optional clip distances */ |
if (output_clipdist) { |
sh->out.register_indices[num_outs] = |
(clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1; |
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; |
sh->out.semantic_indices[num_outs] = 0; |
output_map[num_outs++] = clipdist_slot[0]; |
sh->out.register_indices[num_outs] = |
(clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1; |
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; |
sh->out.semantic_indices[num_outs] = 1; |
output_map[num_outs++] = clipdist_slot[1]; |
} |
/* |
* make BCOLOR follow COLOR so that we can make use of |
* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF |
*/ |
for (i = 0; i < 4; i++) { |
const int slot = color_slot[i]; |
if (slot < 0) |
continue; |
sh->out.register_indices[num_outs] = tgsi->outputs[slot].index; |
sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name; |
sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index; |
output_map[num_outs++] = slot; |
} |
/* add the rest of the outputs */ |
for (i = 0; i < tgsi->num_outputs; i++) { |
switch (tgsi->outputs[i].semantic_name) { |
case TGSI_SEMANTIC_PSIZE: |
case TGSI_SEMANTIC_POSITION: |
case TGSI_SEMANTIC_CLIPDIST: |
case TGSI_SEMANTIC_COLOR: |
case TGSI_SEMANTIC_BCOLOR: |
break; |
default: |
sh->out.register_indices[num_outs] = tgsi->outputs[i].index; |
sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name; |
sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index; |
output_map[num_outs++] = i; |
break; |
} |
} |
sh->out.count = num_outs; |
} |
static void |
gs_setup_vars(struct gs_compile_context *gcc) |
{ |
int grf = gcc->first_free_grf; |
int i; |
gcc->vars.urb_write_header = tdst_d(tdst(TOY_FILE_GRF, grf, 0)); |
grf++; |
gcc->vars.tmp = tdst(TOY_FILE_GRF, grf, 0); |
grf++; |
if (gcc->write_so) { |
gcc->vars.buffer_needed = gcc->out_vue_min_count - 1; |
for (i = 0; i < gcc->vars.buffer_needed; i++) { |
gcc->vars.buffers[i] = tdst(TOY_FILE_GRF, grf, 0); |
grf += gcc->shader->out.count; |
} |
gcc->vars.so_written = tdst_d(tdst(TOY_FILE_GRF, grf, 0)); |
grf++; |
gcc->vars.so_index = tdst_d(tdst(TOY_FILE_GRF, grf, 0)); |
grf++; |
} |
gcc->first_free_grf = grf; |
if (!gcc->tgsi.reg_mapping) { |
for (i = 0; i < gcc->shader->out.count; i++) |
gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_GRF, grf++, 0); |
gcc->first_free_grf = grf; |
return; |
} |
for (i = 0; i < gcc->shader->out.count; i++) { |
const int slot = gcc->output_map[i]; |
const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(&gcc->tgsi, |
TGSI_FILE_OUTPUT, 0, gcc->tgsi.outputs[slot].index) : -1; |
if (vrf >= 0) |
gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_VRF, vrf, 0); |
else |
gcc->vars.tgsi_outs[i] = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f); |
} |
} |
static void |
gs_setup_payload(struct gs_compile_context *gcc) |
{ |
int grf, i; |
grf = 0; |
/* r0: payload header */ |
gcc->payload.header = tsrc_d(tsrc(TOY_FILE_GRF, grf, 0)); |
grf++; |
/* r1: SVBI */ |
if (gcc->write_so) { |
gcc->payload.svbi = tsrc_ud(tsrc(TOY_FILE_GRF, grf, 0)); |
grf++; |
} |
/* URB data */ |
gcc->shader->in.start_grf = grf; |
/* no pull constants */ |
/* VUEs */ |
for (i = 0; i < gcc->in_vue_count; i++) { |
gcc->payload.vues[i] = tsrc(TOY_FILE_GRF, grf, 0); |
grf += gcc->in_vue_size; |
} |
gcc->first_free_grf = grf; |
gcc->last_free_grf = 127; |
} |
/** |
* Set up GS compile context. This includes translating the TGSI tokens. |
*/ |
static bool |
gs_setup(struct gs_compile_context *gcc, |
const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant, |
int num_verts) |
{ |
memset(gcc, 0, sizeof(*gcc)); |
gcc->shader = CALLOC_STRUCT(ilo_shader); |
if (!gcc->shader) |
return false; |
gcc->variant = variant; |
gcc->so_info = &state->info.stream_output; |
toy_compiler_init(&gcc->tc, state->info.dev); |
gcc->write_so = (state->info.stream_output.num_outputs > 0); |
gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard; |
gcc->tc.templ.access_mode = BRW_ALIGN_16; |
gcc->tc.templ.exec_size = BRW_EXECUTE_4; |
gcc->tc.rect_linear_width = 4; |
if (state->info.tokens) { |
if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) { |
toy_compiler_cleanup(&gcc->tc); |
FREE(gcc->shader); |
return false; |
} |
switch (gcc->tgsi.props.gs_input_prim) { |
case PIPE_PRIM_POINTS: |
gcc->in_vue_count = 1; |
break; |
case PIPE_PRIM_LINES: |
gcc->in_vue_count = 2; |
gcc->shader->in.discard_adj = true; |
break; |
case PIPE_PRIM_TRIANGLES: |
gcc->in_vue_count = 3; |
gcc->shader->in.discard_adj = true; |
break; |
case PIPE_PRIM_LINES_ADJACENCY: |
gcc->in_vue_count = 4; |
break; |
case PIPE_PRIM_TRIANGLES_ADJACENCY: |
gcc->in_vue_count = 6; |
break; |
default: |
tc_fail(&gcc->tc, "unsupported GS input type"); |
gcc->in_vue_count = 0; |
break; |
} |
switch (gcc->tgsi.props.gs_output_prim) { |
case PIPE_PRIM_POINTS: |
gcc->out_vue_min_count = 1; |
break; |
case PIPE_PRIM_LINE_STRIP: |
gcc->out_vue_min_count = 2; |
break; |
case PIPE_PRIM_TRIANGLE_STRIP: |
gcc->out_vue_min_count = 3; |
break; |
default: |
tc_fail(&gcc->tc, "unsupported GS output type"); |
gcc->out_vue_min_count = 0; |
break; |
} |
} |
else { |
int i; |
gcc->in_vue_count = num_verts; |
gcc->out_vue_min_count = num_verts; |
gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs; |
for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) { |
gcc->tgsi.outputs[i].semantic_name = |
gcc->variant->u.gs.semantic_names[i]; |
gcc->tgsi.outputs[i].semantic_index = |
gcc->variant->u.gs.semantic_indices[i]; |
} |
} |
gcc->tc.templ.access_mode = BRW_ALIGN_1; |
gs_setup_shader_in(gcc->shader, gcc->variant); |
gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map); |
gcc->in_vue_size = (gcc->shader->in.count + 1) / 2; |
gcc->out_vue_size = (gcc->shader->out.count + 1) / 2; |
gs_setup_payload(gcc); |
gs_setup_vars(gcc); |
/* m0 is reserved for system routines */ |
gcc->first_free_mrf = 1; |
gcc->last_free_mrf = 15; |
return true; |
} |
/** |
* Compile the geometry shader. |
*/ |
struct ilo_shader * |
ilo_shader_compile_gs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
struct gs_compile_context gcc; |
if (!gs_setup(&gcc, state, variant, 0)) |
return NULL; |
if (!gs_compile(&gcc)) { |
FREE(gcc.shader); |
gcc.shader = NULL; |
} |
toy_tgsi_cleanup(&gcc.tgsi); |
toy_compiler_cleanup(&gcc.tc); |
return gcc.shader;; |
} |
static bool |
append_gs_to_vs(struct ilo_shader *vs, struct ilo_shader *gs, int num_verts) |
{ |
void *combined; |
int gs_offset; |
if (!gs) |
return false; |
/* kernels must be aligned to 64-byte */ |
gs_offset = align(vs->kernel_size, 64); |
combined = REALLOC(vs->kernel, vs->kernel_size, |
gs_offset + gs->kernel_size); |
if (!combined) |
return false; |
memcpy(combined + gs_offset, gs->kernel, gs->kernel_size); |
vs->kernel = combined; |
vs->kernel_size = gs_offset + gs->kernel_size; |
vs->stream_output = true; |
vs->gs_offsets[num_verts - 1] = gs_offset; |
vs->gs_start_grf = gs->in.start_grf; |
ilo_shader_destroy_kernel(gs); |
return true; |
} |
bool |
ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state, |
const struct ilo_shader_variant *vs_variant, |
const int *so_mapping, |
struct ilo_shader *vs) |
{ |
struct gs_compile_context gcc; |
struct ilo_shader_state state; |
struct ilo_shader_variant variant; |
const int num_verts = 3; |
int i; |
/* init GS state and variant */ |
state = *vs_state; |
state.info.tokens = NULL; |
for (i = 0; i < state.info.stream_output.num_outputs; i++) { |
const int reg = state.info.stream_output.output[i].register_index; |
state.info.stream_output.output[i].register_index = so_mapping[reg]; |
} |
variant = *vs_variant; |
variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard; |
variant.u.gs.num_inputs = vs->out.count; |
for (i = 0; i < vs->out.count; i++) { |
variant.u.gs.semantic_names[i] = |
vs->out.semantic_names[i]; |
variant.u.gs.semantic_indices[i] = |
vs->out.semantic_indices[i]; |
} |
if (!gs_setup(&gcc, &state, &variant, num_verts)) |
return false; |
if (!gs_compile_passthrough(&gcc)) { |
FREE(gcc.shader); |
gcc.shader = NULL; |
} |
/* no need to call toy_tgsi_cleanup() */ |
toy_compiler_cleanup(&gcc.tc); |
return append_gs_to_vs(vs, gcc.shader, num_verts); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_internal.h |
---|
0,0 → 1,222 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef ILO_SHADER_INTERNAL_H |
#define ILO_SHADER_INTERNAL_H |
#include "ilo_common.h" |
#include "ilo_context.h" |
#include "ilo_shader.h" |
/* XXX The interface needs to be reworked */ |
/** |
* A shader variant. It consists of non-orthogonal states of the pipe context |
* affecting the compilation of a shader. |
*/ |
struct ilo_shader_variant { |
union { |
struct { |
bool rasterizer_discard; |
int num_ucps; |
} vs; |
struct { |
bool rasterizer_discard; |
int num_inputs; |
int semantic_names[PIPE_MAX_SHADER_INPUTS]; |
int semantic_indices[PIPE_MAX_SHADER_INPUTS]; |
} gs; |
struct { |
bool flatshade; |
int fb_height; |
int num_cbufs; |
} fs; |
} u; |
int num_sampler_views; |
struct { |
unsigned r:3; |
unsigned g:3; |
unsigned b:3; |
unsigned a:3; |
} sampler_view_swizzles[ILO_MAX_SAMPLER_VIEWS]; |
uint32_t saturate_tex_coords[3]; |
}; |
/** |
* A compiled shader. |
*/ |
struct ilo_shader { |
struct ilo_shader_variant variant; |
struct ilo_shader_cso cso; |
struct { |
int semantic_names[PIPE_MAX_SHADER_INPUTS]; |
int semantic_indices[PIPE_MAX_SHADER_INPUTS]; |
int interp[PIPE_MAX_SHADER_INPUTS]; |
bool centroid[PIPE_MAX_SHADER_INPUTS]; |
int count; |
int start_grf; |
bool has_pos; |
bool has_linear_interp; |
int barycentric_interpolation_mode; |
uint32_t const_interp_enable; |
bool discard_adj; |
} in; |
struct { |
int register_indices[PIPE_MAX_SHADER_OUTPUTS]; |
int semantic_names[PIPE_MAX_SHADER_OUTPUTS]; |
int semantic_indices[PIPE_MAX_SHADER_OUTPUTS]; |
int count; |
bool has_pos; |
} out; |
bool has_kill; |
bool dispatch_16; |
bool stream_output; |
int svbi_post_inc; |
struct pipe_stream_output_info so_info; |
/* for VS stream output / rasterizer discard */ |
int gs_offsets[3]; |
int gs_start_grf; |
void *kernel; |
int kernel_size; |
bool routing_initialized; |
int routing_src_semantics[PIPE_MAX_SHADER_OUTPUTS]; |
int routing_src_indices[PIPE_MAX_SHADER_OUTPUTS]; |
uint32_t routing_sprite_coord_enable; |
struct ilo_kernel_routing routing; |
/* what does the push constant buffer consist of? */ |
struct { |
int clip_state_size; |
} pcb; |
struct list_head list; |
/* managed by shader cache */ |
bool uploaded; |
uint32_t cache_offset; |
}; |
/** |
* Information about a shader state. |
*/ |
struct ilo_shader_info { |
const struct ilo_dev_info *dev; |
int type; |
const struct tgsi_token *tokens; |
struct pipe_stream_output_info stream_output; |
struct { |
unsigned req_local_mem; |
unsigned req_private_mem; |
unsigned req_input_mem; |
} compute; |
uint32_t non_orthogonal_states; |
bool has_color_interp; |
bool has_pos; |
bool has_vertexid; |
bool has_instanceid; |
bool fs_color0_writes_all_cbufs; |
int edgeflag_in; |
int edgeflag_out; |
uint32_t shadow_samplers; |
int num_samplers; |
}; |
/** |
* A shader state. |
*/ |
struct ilo_shader_state { |
struct ilo_shader_info info; |
struct list_head variants; |
int num_variants, total_size; |
struct ilo_shader *shader; |
/* managed by shader cache */ |
struct ilo_shader_cache *cache; |
struct list_head list; |
}; |
void |
ilo_shader_variant_init(struct ilo_shader_variant *variant, |
const struct ilo_shader_info *info, |
const struct ilo_context *ilo); |
bool |
ilo_shader_state_use_variant(struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant); |
struct ilo_shader * |
ilo_shader_compile_vs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant); |
struct ilo_shader * |
ilo_shader_compile_gs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant); |
bool |
ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state, |
const struct ilo_shader_variant *vs_variant, |
const int *so_mapping, |
struct ilo_shader *vs); |
struct ilo_shader * |
ilo_shader_compile_fs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant); |
struct ilo_shader * |
ilo_shader_compile_cs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant); |
static inline void |
ilo_shader_destroy_kernel(struct ilo_shader *sh) |
{ |
FREE(sh->kernel); |
FREE(sh); |
} |
#endif /* ILO_SHADER_INTERNAL_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_vs.c |
---|
0,0 → 1,1289 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "tgsi/tgsi_dump.h" |
#include "tgsi/tgsi_util.h" |
#include "toy_compiler.h" |
#include "toy_tgsi.h" |
#include "toy_legalize.h" |
#include "toy_optimize.h" |
#include "toy_helpers.h" |
#include "ilo_context.h" |
#include "ilo_shader_internal.h" |
struct vs_compile_context { |
struct ilo_shader *shader; |
const struct ilo_shader_variant *variant; |
struct toy_compiler tc; |
struct toy_tgsi tgsi; |
enum brw_message_target const_cache; |
int output_map[PIPE_MAX_SHADER_OUTPUTS]; |
int num_grf_per_vrf; |
int first_const_grf; |
int first_vue_grf; |
int first_free_grf; |
int last_free_grf; |
int first_free_mrf; |
int last_free_mrf; |
}; |
static void |
vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc, |
struct toy_dst dst, int dim, int idx) |
{ |
struct toy_compiler *tc = &vcc->tc; |
int slot; |
assert(!dim); |
slot = toy_tgsi_find_input(&vcc->tgsi, idx); |
if (slot >= 0) { |
const int first_in_grf = vcc->first_vue_grf + |
(vcc->shader->in.count - vcc->tgsi.num_inputs); |
const int grf = first_in_grf + vcc->tgsi.inputs[slot].semantic_index; |
const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0); |
tc_MOV(tc, dst, src); |
} |
else { |
/* undeclared input */ |
tc_MOV(tc, dst, tsrc_imm_f(0.0f)); |
} |
} |
static void |
vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc, |
struct toy_dst dst, int dim, |
struct toy_src idx) |
{ |
const struct toy_dst header = |
tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); |
const struct toy_dst block_offsets = |
tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0)); |
const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
struct toy_compiler *tc = &vcc->tc; |
unsigned msg_type, msg_ctrl, msg_len; |
struct toy_inst *inst; |
struct toy_src desc; |
/* set message header */ |
inst = tc_MOV(tc, header, r0); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
/* set block offsets */ |
tc_MOV(tc, block_offsets, idx); |
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; |
msg_ctrl = BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD << 8;; |
msg_len = 2; |
desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false, |
msg_type, msg_ctrl, ILO_VS_CONST_SURFACE(dim)); |
tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache); |
} |
static void |
vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc, |
struct toy_dst dst, int dim, |
struct toy_src idx) |
{ |
struct toy_compiler *tc = &vcc->tc; |
const struct toy_dst offset = |
tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); |
struct toy_src desc; |
/* |
* In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was |
* changed from OWord Dual Block Read to ld to increase performance in the |
* classic driver. Since we use the constant cache instead of the data |
* cache, I wonder if we still want to follow the classic driver. |
*/ |
/* set offset */ |
tc_MOV(tc, offset, idx); |
desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false, |
BRW_SAMPLER_SIMD_MODE_SIMD4X2, |
GEN5_SAMPLER_MESSAGE_SAMPLE_LD, |
0, |
ILO_VS_CONST_SURFACE(dim)); |
tc_SEND(tc, dst, tsrc_from(offset), desc, BRW_SFID_SAMPLER); |
} |
static void |
vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc, |
struct toy_dst dst, int idx) |
{ |
const uint32_t *imm; |
int ch; |
imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL); |
for (ch = 0; ch < 4; ch++) { |
/* raw moves */ |
tc_MOV(&vcc->tc, |
tdst_writemask(tdst_ud(dst), 1 << ch), |
tsrc_imm_ud(imm[ch])); |
} |
} |
static void |
vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc, |
struct toy_dst dst, int dim, int idx) |
{ |
struct toy_compiler *tc = &vcc->tc; |
const struct toy_tgsi *tgsi = &vcc->tgsi; |
int slot; |
assert(!dim); |
slot = toy_tgsi_find_system_value(tgsi, idx); |
if (slot < 0) |
return; |
switch (tgsi->system_values[slot].semantic_name) { |
case TGSI_SEMANTIC_INSTANCEID: |
case TGSI_SEMANTIC_VERTEXID: |
/* |
* In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for |
* the generated IDs, with VID in the X channel and IID in the Y |
* channel. |
*/ |
{ |
const int grf = vcc->first_vue_grf; |
const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0); |
const enum toy_swizzle swizzle = |
(tgsi->system_values[slot].semantic_name == |
TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X; |
tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle))); |
} |
break; |
case TGSI_SEMANTIC_PRIMID: |
default: |
tc_fail(tc, "unhandled system value"); |
tc_MOV(tc, dst, tsrc_imm_d(0)); |
break; |
} |
} |
static void |
vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &vcc->tc; |
int dim, idx; |
assert(inst->src[0].file == TOY_FILE_IMM); |
dim = inst->src[0].val32; |
assert(inst->src[1].file == TOY_FILE_IMM); |
idx = inst->src[1].val32; |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_IN: |
vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx); |
break; |
case TOY_OPCODE_TGSI_CONST: |
if (tc->dev->gen >= ILO_GEN(7)) |
vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]); |
else |
vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]); |
break; |
case TOY_OPCODE_TGSI_SV: |
vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx); |
break; |
case TOY_OPCODE_TGSI_IMM: |
assert(!dim); |
vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx); |
break; |
default: |
tc_fail(tc, "unhandled TGSI fetch"); |
break; |
} |
tc_discard_inst(tc, inst); |
} |
static void |
vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &vcc->tc; |
enum tgsi_file_type file; |
int dim, idx; |
struct toy_src indirect_dim, indirect_idx; |
assert(inst->src[0].file == TOY_FILE_IMM); |
file = inst->src[0].val32; |
assert(inst->src[1].file == TOY_FILE_IMM); |
dim = inst->src[1].val32; |
indirect_dim = inst->src[2]; |
assert(inst->src[3].file == TOY_FILE_IMM); |
idx = inst->src[3].val32; |
indirect_idx = inst->src[4]; |
/* no dimension indirection */ |
assert(indirect_dim.file == TOY_FILE_IMM); |
dim += indirect_dim.val32; |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
if (file == TGSI_FILE_CONSTANT) { |
if (idx) { |
struct toy_dst tmp = tc_alloc_tmp(tc); |
tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx)); |
indirect_idx = tsrc_from(tmp); |
} |
if (tc->dev->gen >= ILO_GEN(7)) |
vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx); |
else |
vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx); |
break; |
} |
/* fall through */ |
case TOY_OPCODE_TGSI_INDIRECT_STORE: |
default: |
tc_fail(tc, "unhandled TGSI indirection"); |
break; |
} |
tc_discard_inst(tc, inst); |
} |
/** |
* Emit instructions to move sampling parameters to the message registers. |
*/ |
static int |
vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf, |
struct toy_src coords, int num_coords, |
struct toy_src bias_or_lod, struct toy_src ref_or_si, |
struct toy_src ddx, struct toy_src ddy, int num_derivs) |
{ |
const unsigned coords_writemask = (1 << num_coords) - 1; |
struct toy_dst m[3]; |
int num_params, i; |
assert(num_coords <= 4); |
assert(num_derivs <= 3 && num_derivs <= num_coords); |
for (i = 0; i < Elements(m); i++) |
m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0); |
switch (msg_type) { |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD: |
tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); |
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod); |
num_params = 5; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS: |
tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); |
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ), |
tsrc_swizzle(ddx, 0, 0, 1, 1)); |
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW), |
tsrc_swizzle(ddy, 0, 0, 1, 1)); |
if (num_derivs > 2) { |
tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X), |
tsrc_swizzle1(ddx, 2)); |
tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y), |
tsrc_swizzle1(ddy, 2)); |
} |
num_params = 4 + num_derivs * 2; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE: |
tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); |
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si); |
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod); |
num_params = 6; |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_LD: |
assert(num_coords <= 3); |
tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords); |
tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod); |
if (tc->dev->gen >= ILO_GEN(7)) { |
num_params = 4; |
} |
else { |
tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si); |
num_params = 5; |
} |
break; |
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO: |
tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod); |
num_params = 1; |
break; |
default: |
tc_fail(tc, "unknown sampler opcode"); |
num_params = 0; |
break; |
} |
return (num_params + 3) / 4; |
} |
/** |
* Set up message registers and return the message descriptor for sampling. |
*/ |
static struct toy_src |
vs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst, |
int base_mrf, unsigned *ret_sampler_index) |
{ |
unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; |
struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si; |
int num_coords, ref_pos, num_derivs; |
int sampler_src; |
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD4X2; |
coords = inst->src[0]; |
ddx = tsrc_null(); |
ddy = tsrc_null(); |
bias_or_lod = tsrc_null(); |
ref_or_si = tsrc_null(); |
num_derivs = 0; |
sampler_src = 1; |
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos); |
/* extract the parameters */ |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_TXD: |
if (ref_pos >= 0) |
tc_fail(tc, "TXD with shadow sampler not supported"); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; |
ddx = inst->src[1]; |
ddy = inst->src[2]; |
num_derivs = num_coords; |
sampler_src = 3; |
break; |
case TOY_OPCODE_TGSI_TXL: |
if (ref_pos >= 0) { |
assert(ref_pos < 3); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; |
ref_or_si = tsrc_swizzle1(coords, ref_pos); |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; |
} |
bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); |
break; |
case TOY_OPCODE_TGSI_TXF: |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; |
switch (inst->tex.target) { |
case TGSI_TEXTURE_2D_MSAA: |
case TGSI_TEXTURE_2D_ARRAY_MSAA: |
assert(ref_pos >= 0 && ref_pos < 4); |
/* lod is always 0 */ |
bias_or_lod = tsrc_imm_d(0); |
ref_or_si = tsrc_swizzle1(coords, ref_pos); |
break; |
default: |
bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); |
break; |
} |
/* offset the coordinates */ |
if (!tsrc_is_null(inst->tex.offsets[0])) { |
struct toy_dst tmp; |
tmp = tc_alloc_tmp(tc); |
tc_ADD(tc, tmp, coords, inst->tex.offsets[0]); |
coords = tsrc_from(tmp); |
} |
sampler_src = 1; |
break; |
case TOY_OPCODE_TGSI_TXQ: |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; |
num_coords = 0; |
bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X); |
break; |
case TOY_OPCODE_TGSI_TXQ_LZ: |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; |
num_coords = 0; |
sampler_src = 0; |
break; |
case TOY_OPCODE_TGSI_TXL2: |
if (ref_pos >= 0) { |
assert(ref_pos < 4); |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; |
ref_or_si = tsrc_swizzle1(coords, ref_pos); |
} |
else { |
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; |
} |
bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X); |
sampler_src = 2; |
break; |
default: |
assert(!"unhandled sampling opcode"); |
if (ret_sampler_index) |
*ret_sampler_index = 0; |
return tsrc_null(); |
break; |
} |
assert(inst->src[sampler_src].file == TOY_FILE_IMM); |
sampler_index = inst->src[sampler_src].val32; |
binding_table_index = ILO_VS_TEXTURE_SURFACE(sampler_index); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 18: |
* |
* "Note that the (cube map) coordinates delivered to the sampling |
* engine must already have been divided by the component with the |
* largest absolute value." |
*/ |
switch (inst->tex.target) { |
case TGSI_TEXTURE_CUBE: |
case TGSI_TEXTURE_SHADOWCUBE: |
case TGSI_TEXTURE_CUBE_ARRAY: |
case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
/* TXQ does not need coordinates */ |
if (num_coords >= 3) { |
struct toy_dst tmp, max; |
struct toy_src abs_coords[3]; |
int i; |
tmp = tc_alloc_tmp(tc); |
max = tdst_writemask(tmp, TOY_WRITEMASK_W); |
for (i = 0; i < 3; i++) |
abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i)); |
tc_SEL(tc, max, abs_coords[0], abs_coords[0], BRW_CONDITIONAL_GE); |
tc_SEL(tc, max, tsrc_from(max), abs_coords[0], BRW_CONDITIONAL_GE); |
tc_INV(tc, max, tsrc_from(max)); |
for (i = 0; i < 3; i++) |
tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max)); |
coords = tsrc_from(tmp); |
} |
break; |
} |
/* set up sampler parameters */ |
msg_len = vs_add_sampler_params(tc, msg_type, base_mrf, |
coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); |
/* |
* From the Sandy Bridge PRM, volume 4 part 1, page 136: |
* |
* "The maximum message length allowed to the sampler is 11. This would |
* disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of |
* SIMD16." |
*/ |
if (msg_len > 11) |
tc_fail(tc, "maximum length for messages to the sampler is 11"); |
if (ret_sampler_index) |
*ret_sampler_index = sampler_index; |
return tsrc_imm_mdesc_sampler(tc, msg_len, 1, |
false, simd_mode, msg_type, sampler_index, binding_table_index); |
} |
static void |
vs_lower_opcode_tgsi_sampling(struct vs_compile_context *vcc, |
struct toy_inst *inst) |
{ |
struct toy_compiler *tc = &vcc->tc; |
struct toy_src desc; |
struct toy_dst dst, tmp; |
unsigned sampler_index; |
int swizzles[4], i; |
unsigned swizzle_zero_mask, swizzle_one_mask, swizzle_normal_mask; |
bool need_filter; |
desc = vs_prepare_tgsi_sampling(tc, inst, |
vcc->first_free_mrf, &sampler_index); |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_TXF: |
case TOY_OPCODE_TGSI_TXQ: |
case TOY_OPCODE_TGSI_TXQ_LZ: |
need_filter = false; |
break; |
default: |
need_filter = true; |
break; |
} |
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER); |
inst->src[0] = tsrc(TOY_FILE_MRF, vcc->first_free_mrf, 0); |
inst->src[1] = desc; |
/* write to a temp first */ |
tmp = tc_alloc_tmp(tc); |
tmp.type = inst->dst.type; |
dst = inst->dst; |
inst->dst = tmp; |
tc_move_inst(tc, inst); |
if (need_filter) { |
assert(sampler_index < vcc->variant->num_sampler_views); |
swizzles[0] = vcc->variant->sampler_view_swizzles[sampler_index].r; |
swizzles[1] = vcc->variant->sampler_view_swizzles[sampler_index].g; |
swizzles[2] = vcc->variant->sampler_view_swizzles[sampler_index].b; |
swizzles[3] = vcc->variant->sampler_view_swizzles[sampler_index].a; |
} |
else { |
swizzles[0] = PIPE_SWIZZLE_RED; |
swizzles[1] = PIPE_SWIZZLE_GREEN; |
swizzles[2] = PIPE_SWIZZLE_BLUE; |
swizzles[3] = PIPE_SWIZZLE_ALPHA; |
} |
swizzle_zero_mask = 0; |
swizzle_one_mask = 0; |
swizzle_normal_mask = 0; |
for (i = 0; i < 4; i++) { |
switch (swizzles[i]) { |
case PIPE_SWIZZLE_ZERO: |
swizzle_zero_mask |= 1 << i; |
swizzles[i] = i; |
break; |
case PIPE_SWIZZLE_ONE: |
swizzle_one_mask |= 1 << i; |
swizzles[i] = i; |
break; |
default: |
swizzle_normal_mask |= 1 << i; |
break; |
} |
} |
/* swizzle the results */ |
if (swizzle_normal_mask) { |
tc_MOV(tc, tdst_writemask(dst, swizzle_normal_mask), |
tsrc_swizzle(tsrc_from(tmp), swizzles[0], |
swizzles[1], swizzles[2], swizzles[3])); |
} |
if (swizzle_zero_mask) |
tc_MOV(tc, tdst_writemask(dst, swizzle_zero_mask), tsrc_imm_f(0.0f)); |
if (swizzle_one_mask) |
tc_MOV(tc, tdst_writemask(dst, swizzle_one_mask), tsrc_imm_f(1.0f)); |
} |
static void |
vs_lower_opcode_urb_write(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
/* vs_write_vue() has set up the message registers */ |
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB); |
} |
static void |
vs_lower_virtual_opcodes(struct vs_compile_context *vcc) |
{ |
struct toy_compiler *tc = &vcc->tc; |
struct toy_inst *inst; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case TOY_OPCODE_TGSI_IN: |
case TOY_OPCODE_TGSI_CONST: |
case TOY_OPCODE_TGSI_SV: |
case TOY_OPCODE_TGSI_IMM: |
vs_lower_opcode_tgsi_direct(vcc, inst); |
break; |
case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
case TOY_OPCODE_TGSI_INDIRECT_STORE: |
vs_lower_opcode_tgsi_indirect(vcc, inst); |
break; |
case TOY_OPCODE_TGSI_TEX: |
case TOY_OPCODE_TGSI_TXB: |
case TOY_OPCODE_TGSI_TXD: |
case TOY_OPCODE_TGSI_TXL: |
case TOY_OPCODE_TGSI_TXP: |
case TOY_OPCODE_TGSI_TXF: |
case TOY_OPCODE_TGSI_TXQ: |
case TOY_OPCODE_TGSI_TXQ_LZ: |
case TOY_OPCODE_TGSI_TEX2: |
case TOY_OPCODE_TGSI_TXB2: |
case TOY_OPCODE_TGSI_TXL2: |
case TOY_OPCODE_TGSI_SAMPLE: |
case TOY_OPCODE_TGSI_SAMPLE_I: |
case TOY_OPCODE_TGSI_SAMPLE_I_MS: |
case TOY_OPCODE_TGSI_SAMPLE_B: |
case TOY_OPCODE_TGSI_SAMPLE_C: |
case TOY_OPCODE_TGSI_SAMPLE_C_LZ: |
case TOY_OPCODE_TGSI_SAMPLE_D: |
case TOY_OPCODE_TGSI_SAMPLE_L: |
case TOY_OPCODE_TGSI_GATHER4: |
case TOY_OPCODE_TGSI_SVIEWINFO: |
case TOY_OPCODE_TGSI_SAMPLE_POS: |
case TOY_OPCODE_TGSI_SAMPLE_INFO: |
vs_lower_opcode_tgsi_sampling(vcc, inst); |
break; |
case TOY_OPCODE_INV: |
case TOY_OPCODE_LOG: |
case TOY_OPCODE_EXP: |
case TOY_OPCODE_SQRT: |
case TOY_OPCODE_RSQ: |
case TOY_OPCODE_SIN: |
case TOY_OPCODE_COS: |
case TOY_OPCODE_FDIV: |
case TOY_OPCODE_POW: |
case TOY_OPCODE_INT_DIV_QUOTIENT: |
case TOY_OPCODE_INT_DIV_REMAINDER: |
toy_compiler_lower_math(tc, inst); |
break; |
case TOY_OPCODE_URB_WRITE: |
vs_lower_opcode_urb_write(tc, inst); |
break; |
default: |
if (inst->opcode > 127) |
tc_fail(tc, "unhandled virtual opcode"); |
break; |
} |
} |
} |
/** |
* Compile the shader. |
*/ |
static bool |
vs_compile(struct vs_compile_context *vcc) |
{ |
struct toy_compiler *tc = &vcc->tc; |
struct ilo_shader *sh = vcc->shader; |
vs_lower_virtual_opcodes(vcc); |
toy_compiler_legalize_for_ra(tc); |
toy_compiler_optimize(tc); |
toy_compiler_allocate_registers(tc, |
vcc->first_free_grf, |
vcc->last_free_grf, |
vcc->num_grf_per_vrf); |
toy_compiler_legalize_for_asm(tc); |
if (tc->fail) { |
ilo_err("failed to legalize VS instructions: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_VS) { |
ilo_printf("legalized instructions:\n"); |
toy_compiler_dump(tc); |
ilo_printf("\n"); |
} |
if (true) { |
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); |
} |
else { |
static const uint32_t microcode[] = { |
/* fill in the microcode here */ |
0x0, 0x0, 0x0, 0x0, |
}; |
const bool swap = true; |
sh->kernel_size = sizeof(microcode); |
sh->kernel = MALLOC(sh->kernel_size); |
if (sh->kernel) { |
const int num_dwords = sizeof(microcode) / 4; |
const uint32_t *src = microcode; |
uint32_t *dst = (uint32_t *) sh->kernel; |
int i; |
for (i = 0; i < num_dwords; i += 4) { |
if (swap) { |
dst[i + 0] = src[i + 3]; |
dst[i + 1] = src[i + 2]; |
dst[i + 2] = src[i + 1]; |
dst[i + 3] = src[i + 0]; |
} |
else { |
memcpy(dst, src, 16); |
} |
} |
} |
} |
if (!sh->kernel) { |
ilo_err("failed to compile VS: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_VS) { |
ilo_printf("disassembly:\n"); |
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size); |
ilo_printf("\n"); |
} |
return true; |
} |
/** |
* Collect the toy registers to be written to the VUE. |
*/ |
static int |
vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs) |
{ |
const struct toy_tgsi *tgsi = &vcc->tgsi; |
int i; |
for (i = 0; i < vcc->shader->out.count; i++) { |
const int slot = vcc->output_map[i]; |
const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(tgsi, |
TGSI_FILE_OUTPUT, 0, tgsi->outputs[slot].index) : -1; |
struct toy_src src; |
if (vrf >= 0) { |
struct toy_dst dst; |
dst = tdst(TOY_FILE_VRF, vrf, 0); |
src = tsrc_from(dst); |
if (i == 0) { |
/* PSIZE is at channel W */ |
tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_W), |
tsrc_swizzle1(src, TOY_SWIZZLE_X)); |
/* the other channels are for the header */ |
dst = tdst_d(dst); |
tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_XYZ), |
tsrc_imm_d(0)); |
} |
else { |
/* initialize unused channels to 0.0f */ |
if (tgsi->outputs[slot].undefined_mask) { |
dst = tdst_writemask(dst, tgsi->outputs[slot].undefined_mask); |
tc_MOV(&vcc->tc, dst, tsrc_imm_f(0.0f)); |
} |
} |
} |
else { |
/* XXX this is too ugly */ |
if (vcc->shader->out.semantic_names[i] == TGSI_SEMANTIC_CLIPDIST && |
slot < 0) { |
/* ok, we need to compute clip distance */ |
int clipvert_slot = -1, clipvert_vrf, j; |
for (j = 0; j < tgsi->num_outputs; j++) { |
if (tgsi->outputs[j].semantic_name == |
TGSI_SEMANTIC_CLIPVERTEX) { |
clipvert_slot = j; |
break; |
} |
else if (tgsi->outputs[j].semantic_name == |
TGSI_SEMANTIC_POSITION) { |
/* remember pos, but keep looking */ |
clipvert_slot = j; |
} |
} |
clipvert_vrf = (clipvert_slot >= 0) ? toy_tgsi_get_vrf(tgsi, |
TGSI_FILE_OUTPUT, 0, tgsi->outputs[clipvert_slot].index) : -1; |
if (clipvert_vrf >= 0) { |
struct toy_dst tmp = tc_alloc_tmp(&vcc->tc); |
struct toy_src clipvert = tsrc(TOY_FILE_VRF, clipvert_vrf, 0); |
int first_ucp, last_ucp; |
if (vcc->shader->out.semantic_indices[i]) { |
first_ucp = 4; |
last_ucp = MIN2(7, vcc->variant->u.vs.num_ucps - 1); |
} |
else { |
first_ucp = 0; |
last_ucp = MIN2(3, vcc->variant->u.vs.num_ucps - 1); |
} |
for (j = first_ucp; j <= last_ucp; j++) { |
const int plane_grf = vcc->first_const_grf + j / 2; |
const int plane_subreg = (j & 1) * 16; |
const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF, |
plane_grf, plane_subreg), TOY_RECT_041); |
const unsigned writemask = 1 << ((j >= 4) ? j - 4 : j); |
tc_DP4(&vcc->tc, tdst_writemask(tmp, writemask), |
clipvert, plane); |
} |
src = tsrc_from(tmp); |
} |
else { |
src = tsrc_imm_f(0.0f); |
} |
} |
else { |
src = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f); |
} |
} |
outs[i] = src; |
} |
return i; |
} |
/** |
* Emit instructions to write the VUE. |
*/ |
static void |
vs_write_vue(struct vs_compile_context *vcc) |
{ |
struct toy_compiler *tc = &vcc->tc; |
struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS]; |
struct toy_dst header; |
struct toy_src r0; |
struct toy_inst *inst; |
int sent_attrs, total_attrs; |
header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); |
r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
inst = tc_MOV(tc, header, r0); |
inst->mask_ctrl = BRW_MASK_DISABLE; |
if (tc->dev->gen >= ILO_GEN(7)) { |
inst = tc_OR(tc, tdst_offset(header, 0, 5), |
tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010), |
tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010)); |
inst->exec_size = BRW_EXECUTE_1; |
inst->access_mode = BRW_ALIGN_1; |
inst->mask_ctrl = BRW_MASK_DISABLE; |
} |
total_attrs = vs_collect_outputs(vcc, outs); |
sent_attrs = 0; |
while (sent_attrs < total_attrs) { |
struct toy_src desc; |
int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs; |
int num_attrs, msg_len, i; |
bool eot; |
num_attrs = total_attrs - sent_attrs; |
eot = true; |
/* see if we need another message */ |
avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1; |
if (num_attrs > avail_mrf_for_attrs) { |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 22: |
* |
* "Offset. This field specifies a destination offset (in 256-bit |
* units) from the start of the URB entry(s), as referenced by |
* URB Return Handle n, at which the data (if any) will be |
* written." |
* |
* As we need to offset the following messages, we must make sure |
* this one writes an even number of attributes. |
*/ |
num_attrs = avail_mrf_for_attrs & ~1; |
eot = false; |
} |
if (tc->dev->gen >= ILO_GEN(7)) { |
/* do not forget about the header */ |
msg_len = 1 + num_attrs; |
} |
else { |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 26: |
* |
* "At least 256 bits per vertex (512 bits total, M1 & M2) must |
* be written. Writing only 128 bits per vertex (256 bits |
* total, M1 only) results in UNDEFINED operation." |
* |
* "[DevSNB] Interleave writes must be in multiples of 256 per |
* vertex." |
* |
* That is, we must write or appear to write an even number of |
* attributes, starting from two. |
*/ |
if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) { |
num_attrs--; |
eot = false; |
} |
msg_len = 1 + align(num_attrs, 2); |
} |
for (i = 0; i < num_attrs; i++) |
tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]); |
assert(sent_attrs % 2 == 0); |
desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0, |
eot, true, false, BRW_URB_SWIZZLE_INTERLEAVE, sent_attrs / 2, 0); |
tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc); |
sent_attrs += num_attrs; |
} |
} |
/** |
* Set up shader inputs for fixed-function units. |
*/ |
static void |
vs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi) |
{ |
int num_attrs, i; |
/* vertex/instance id is the first VE if exists */ |
for (i = 0; i < tgsi->num_system_values; i++) { |
bool found = false; |
switch (tgsi->system_values[i].semantic_name) { |
case TGSI_SEMANTIC_INSTANCEID: |
case TGSI_SEMANTIC_VERTEXID: |
found = true; |
break; |
default: |
break; |
} |
if (found) { |
sh->in.semantic_names[sh->in.count] = |
tgsi->system_values[i].semantic_name; |
sh->in.semantic_indices[sh->in.count] = |
tgsi->system_values[i].semantic_index; |
sh->in.interp[sh->in.count] = TGSI_INTERPOLATE_CONSTANT; |
sh->in.centroid[sh->in.count] = false; |
sh->in.count++; |
break; |
} |
} |
num_attrs = 0; |
for (i = 0; i < tgsi->num_inputs; i++) { |
assert(tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_GENERIC); |
if (tgsi->inputs[i].semantic_index >= num_attrs) |
num_attrs = tgsi->inputs[i].semantic_index + 1; |
} |
assert(num_attrs <= PIPE_MAX_ATTRIBS); |
/* VF cannot remap VEs. VE[i] must be used as GENERIC[i]. */ |
for (i = 0; i < num_attrs; i++) { |
sh->in.semantic_names[sh->in.count + i] = TGSI_SEMANTIC_GENERIC; |
sh->in.semantic_indices[sh->in.count + i] = i; |
sh->in.interp[sh->in.count + i] = TGSI_INTERPOLATE_CONSTANT; |
sh->in.centroid[sh->in.count + i] = false; |
} |
sh->in.count += num_attrs; |
sh->in.has_pos = false; |
sh->in.has_linear_interp = false; |
sh->in.barycentric_interpolation_mode = 0; |
} |
/** |
* Set up shader outputs for fixed-function units. |
*/ |
static void |
vs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi, |
bool output_clipdist, int *output_map) |
{ |
int psize_slot = -1, pos_slot = -1; |
int clipdist_slot[2] = { -1, -1 }; |
int color_slot[4] = { -1, -1, -1, -1 }; |
int num_outs, i; |
/* find out the slots of outputs that need special care */ |
for (i = 0; i < tgsi->num_outputs; i++) { |
switch (tgsi->outputs[i].semantic_name) { |
case TGSI_SEMANTIC_PSIZE: |
psize_slot = i; |
break; |
case TGSI_SEMANTIC_POSITION: |
pos_slot = i; |
break; |
case TGSI_SEMANTIC_CLIPDIST: |
if (tgsi->outputs[i].semantic_index) |
clipdist_slot[1] = i; |
else |
clipdist_slot[0] = i; |
break; |
case TGSI_SEMANTIC_COLOR: |
if (tgsi->outputs[i].semantic_index) |
color_slot[2] = i; |
else |
color_slot[0] = i; |
break; |
case TGSI_SEMANTIC_BCOLOR: |
if (tgsi->outputs[i].semantic_index) |
color_slot[3] = i; |
else |
color_slot[1] = i; |
break; |
default: |
break; |
} |
} |
/* the first two VUEs are always PSIZE and POSITION */ |
num_outs = 2; |
output_map[0] = psize_slot; |
output_map[1] = pos_slot; |
sh->out.register_indices[0] = |
(psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1; |
sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE; |
sh->out.semantic_indices[0] = 0; |
sh->out.register_indices[1] = |
(pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1; |
sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION; |
sh->out.semantic_indices[1] = 0; |
sh->out.has_pos = true; |
/* followed by optional clip distances */ |
if (output_clipdist) { |
sh->out.register_indices[num_outs] = |
(clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1; |
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; |
sh->out.semantic_indices[num_outs] = 0; |
output_map[num_outs++] = clipdist_slot[0]; |
sh->out.register_indices[num_outs] = |
(clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1; |
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; |
sh->out.semantic_indices[num_outs] = 1; |
output_map[num_outs++] = clipdist_slot[1]; |
} |
/* |
* make BCOLOR follow COLOR so that we can make use of |
* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF |
*/ |
for (i = 0; i < 4; i++) { |
const int slot = color_slot[i]; |
if (slot < 0) |
continue; |
sh->out.register_indices[num_outs] = tgsi->outputs[slot].index; |
sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name; |
sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index; |
output_map[num_outs++] = slot; |
} |
/* add the rest of the outputs */ |
for (i = 0; i < tgsi->num_outputs; i++) { |
switch (tgsi->outputs[i].semantic_name) { |
case TGSI_SEMANTIC_PSIZE: |
case TGSI_SEMANTIC_POSITION: |
case TGSI_SEMANTIC_CLIPDIST: |
case TGSI_SEMANTIC_COLOR: |
case TGSI_SEMANTIC_BCOLOR: |
break; |
default: |
sh->out.register_indices[num_outs] = tgsi->outputs[i].index; |
sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name; |
sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index; |
output_map[num_outs++] = i; |
break; |
} |
} |
sh->out.count = num_outs; |
} |
/** |
* Translate the TGSI tokens. |
*/ |
static bool |
vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, |
struct toy_tgsi *tgsi) |
{ |
if (ilo_debug & ILO_DEBUG_VS) { |
ilo_printf("dumping vertex shader\n"); |
ilo_printf("\n"); |
tgsi_dump(tokens, 0); |
ilo_printf("\n"); |
} |
toy_compiler_translate_tgsi(tc, tokens, true, tgsi); |
if (tc->fail) { |
ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason); |
return false; |
} |
if (ilo_debug & ILO_DEBUG_VS) { |
ilo_printf("TGSI translator:\n"); |
toy_tgsi_dump(tgsi); |
ilo_printf("\n"); |
toy_compiler_dump(tc); |
ilo_printf("\n"); |
} |
return true; |
} |
/** |
* Set up VS compile context. This includes translating the TGSI tokens. |
*/ |
static bool |
vs_setup(struct vs_compile_context *vcc, |
const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
int num_consts; |
memset(vcc, 0, sizeof(*vcc)); |
vcc->shader = CALLOC_STRUCT(ilo_shader); |
if (!vcc->shader) |
return false; |
vcc->variant = variant; |
toy_compiler_init(&vcc->tc, state->info.dev); |
vcc->tc.templ.access_mode = BRW_ALIGN_16; |
vcc->tc.templ.exec_size = BRW_EXECUTE_8; |
vcc->tc.rect_linear_width = 4; |
/* |
* The classic driver uses the sampler cache (gen6) or the data cache |
* (gen7). Why? |
*/ |
vcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE; |
if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) { |
toy_compiler_cleanup(&vcc->tc); |
FREE(vcc->shader); |
return false; |
} |
vs_setup_shader_in(vcc->shader, &vcc->tgsi); |
vs_setup_shader_out(vcc->shader, &vcc->tgsi, |
(vcc->variant->u.vs.num_ucps > 0), vcc->output_map); |
/* fit each pair of user clip planes into a register */ |
num_consts = (vcc->variant->u.vs.num_ucps + 1) / 2; |
/* r0 is reserved for payload header */ |
vcc->first_const_grf = 1; |
vcc->first_vue_grf = vcc->first_const_grf + num_consts; |
vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count; |
vcc->last_free_grf = 127; |
/* m0 is reserved for system routines */ |
vcc->first_free_mrf = 1; |
vcc->last_free_mrf = 15; |
vcc->num_grf_per_vrf = 1; |
if (vcc->tc.dev->gen >= ILO_GEN(7)) { |
vcc->last_free_grf -= 15; |
vcc->first_free_mrf = vcc->last_free_grf + 1; |
vcc->last_free_mrf = vcc->first_free_mrf + 14; |
} |
vcc->shader->in.start_grf = vcc->first_const_grf; |
vcc->shader->pcb.clip_state_size = |
vcc->variant->u.vs.num_ucps * (sizeof(float) * 4); |
return true; |
} |
/** |
* Compile the vertex shader. |
*/ |
struct ilo_shader * |
ilo_shader_compile_vs(const struct ilo_shader_state *state, |
const struct ilo_shader_variant *variant) |
{ |
struct vs_compile_context vcc; |
bool need_gs; |
if (!vs_setup(&vcc, state, variant)) |
return NULL; |
if (vcc.tc.dev->gen >= ILO_GEN(7)) { |
need_gs = false; |
} |
else { |
need_gs = variant->u.vs.rasterizer_discard || |
state->info.stream_output.num_outputs; |
} |
vs_write_vue(&vcc); |
if (!vs_compile(&vcc)) { |
FREE(vcc.shader); |
vcc.shader = NULL; |
} |
toy_tgsi_cleanup(&vcc.tgsi); |
toy_compiler_cleanup(&vcc.tc); |
if (need_gs) { |
int so_mapping[PIPE_MAX_SHADER_OUTPUTS]; |
int i, j; |
for (i = 0; i < vcc.tgsi.num_outputs; i++) { |
int attr = 0; |
for (j = 0; j < vcc.shader->out.count; j++) { |
if (vcc.tgsi.outputs[i].semantic_name == |
vcc.shader->out.semantic_names[j] && |
vcc.tgsi.outputs[i].semantic_index == |
vcc.shader->out.semantic_indices[j]) { |
attr = j; |
break; |
} |
} |
so_mapping[i] = attr; |
} |
if (!ilo_shader_compile_gs_passthrough(state, variant, |
so_mapping, vcc.shader)) { |
ilo_shader_destroy_kernel(vcc.shader); |
vcc.shader = NULL; |
} |
} |
return vcc.shader; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler.c |
---|
0,0 → 1,556 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "toy_compiler.h" |
/** |
* Dump an operand. |
*/ |
static void |
tc_dump_operand(struct toy_compiler *tc, |
enum toy_file file, enum toy_type type, enum toy_rect rect, |
bool indirect, unsigned indirect_subreg, uint32_t val32, |
bool is_dst) |
{ |
static const char *toy_file_names[TOY_FILE_COUNT] = { |
[TOY_FILE_VRF] = "v", |
[TOY_FILE_ARF] = "NOT USED", |
[TOY_FILE_GRF] = "r", |
[TOY_FILE_MRF] = "m", |
[TOY_FILE_IMM] = "NOT USED", |
}; |
const char *name = toy_file_names[file]; |
int reg, subreg; |
if (file != TOY_FILE_IMM) { |
reg = val32 / TOY_REG_WIDTH; |
subreg = (val32 % TOY_REG_WIDTH) / toy_type_size(type); |
} |
switch (file) { |
case TOY_FILE_GRF: |
if (indirect) { |
const int addr_subreg = indirect_subreg / toy_type_size(TOY_TYPE_UW); |
ilo_printf("%s[a0.%d", name, addr_subreg); |
if (val32) |
ilo_printf("%+d", (int) val32); |
ilo_printf("]"); |
break; |
} |
/* fall through */ |
case TOY_FILE_VRF: |
case TOY_FILE_MRF: |
ilo_printf("%s%d", name, reg); |
if (subreg) |
ilo_printf(".%d", subreg); |
break; |
case TOY_FILE_ARF: |
switch (reg) { |
case BRW_ARF_NULL: |
ilo_printf("null"); |
break; |
case BRW_ARF_ADDRESS: |
ilo_printf("a0.%d", subreg); |
break; |
case BRW_ARF_ACCUMULATOR: |
case BRW_ARF_ACCUMULATOR + 1: |
ilo_printf("acc%d.%d", (reg & 1), subreg); |
break; |
case BRW_ARF_FLAG: |
ilo_printf("f0.%d", subreg); |
break; |
case BRW_ARF_STATE: |
ilo_printf("sr0.%d", subreg); |
break; |
case BRW_ARF_CONTROL: |
ilo_printf("cr0.%d", subreg); |
break; |
case BRW_ARF_NOTIFICATION_COUNT: |
case BRW_ARF_NOTIFICATION_COUNT + 1: |
ilo_printf("n%d.%d", (reg & 1), subreg); |
break; |
case BRW_ARF_IP: |
ilo_printf("ip"); |
break; |
} |
break; |
case TOY_FILE_IMM: |
switch (type) { |
case TOY_TYPE_F: |
{ |
union fi fi = { .ui = val32 }; |
ilo_printf("%f", fi.f); |
} |
break; |
case TOY_TYPE_D: |
ilo_printf("%d", (int32_t) val32); |
break; |
case TOY_TYPE_UD: |
ilo_printf("%u", val32); |
break; |
case TOY_TYPE_W: |
ilo_printf("%d", (int16_t) (val32 & 0xffff)); |
break; |
case TOY_TYPE_UW: |
ilo_printf("%u", val32 & 0xffff); |
break; |
case TOY_TYPE_V: |
ilo_printf("0x%08x", val32); |
break; |
default: |
assert(!"unknown imm type"); |
break; |
} |
break; |
default: |
assert(!"unexpected file"); |
break; |
} |
/* dump the region parameter */ |
if (file != TOY_FILE_IMM) { |
int vert_stride, width, horz_stride; |
switch (rect) { |
case TOY_RECT_LINEAR: |
vert_stride = tc->rect_linear_width; |
width = tc->rect_linear_width; |
horz_stride = 1; |
break; |
case TOY_RECT_041: |
vert_stride = 0; |
width = 4; |
horz_stride = 1; |
break; |
case TOY_RECT_010: |
vert_stride = 0; |
width = 1; |
horz_stride = 0; |
break; |
case TOY_RECT_220: |
vert_stride = 2; |
width = 2; |
horz_stride = 0; |
break; |
case TOY_RECT_440: |
vert_stride = 4; |
width = 4; |
horz_stride = 0; |
break; |
case TOY_RECT_240: |
vert_stride = 2; |
width = 4; |
horz_stride = 0; |
break; |
default: |
assert(!"unknown rect parameter"); |
vert_stride = 0; |
width = 0; |
horz_stride = 0; |
break; |
} |
if (is_dst) |
ilo_printf("<%d>", horz_stride); |
else |
ilo_printf("<%d;%d,%d>", vert_stride, width, horz_stride); |
} |
switch (type) { |
case TOY_TYPE_F: |
ilo_printf(":f"); |
break; |
case TOY_TYPE_D: |
ilo_printf(":d"); |
break; |
case TOY_TYPE_UD: |
ilo_printf(":ud"); |
break; |
case TOY_TYPE_W: |
ilo_printf(":w"); |
break; |
case TOY_TYPE_UW: |
ilo_printf(":uw"); |
break; |
case TOY_TYPE_V: |
ilo_printf(":v"); |
break; |
default: |
assert(!"unexpected type"); |
break; |
} |
} |
/** |
* Dump a source operand. |
*/ |
static void |
tc_dump_src(struct toy_compiler *tc, struct toy_src src) |
{ |
if (src.negate) |
ilo_printf("-"); |
if (src.absolute) |
ilo_printf("|"); |
tc_dump_operand(tc, src.file, src.type, src.rect, |
src.indirect, src.indirect_subreg, src.val32, false); |
if (tsrc_is_swizzled(src)) { |
const char xyzw[] = "xyzw"; |
ilo_printf(".%c%c%c%c", |
xyzw[src.swizzle_x], |
xyzw[src.swizzle_y], |
xyzw[src.swizzle_z], |
xyzw[src.swizzle_w]); |
} |
if (src.absolute) |
ilo_printf("|"); |
} |
/** |
* Dump a destination operand. |
*/ |
static void |
tc_dump_dst(struct toy_compiler *tc, struct toy_dst dst) |
{ |
tc_dump_operand(tc, dst.file, dst.type, dst.rect, |
dst.indirect, dst.indirect_subreg, dst.val32, true); |
if (dst.writemask != TOY_WRITEMASK_XYZW) { |
ilo_printf("."); |
if (dst.writemask & TOY_WRITEMASK_X) |
ilo_printf("x"); |
if (dst.writemask & TOY_WRITEMASK_Y) |
ilo_printf("y"); |
if (dst.writemask & TOY_WRITEMASK_Z) |
ilo_printf("z"); |
if (dst.writemask & TOY_WRITEMASK_W) |
ilo_printf("w"); |
} |
} |
static const char * |
get_opcode_name(unsigned opcode) |
{ |
switch (opcode) { |
case BRW_OPCODE_MOV: return "mov"; |
case BRW_OPCODE_SEL: return "sel"; |
case BRW_OPCODE_NOT: return "not"; |
case BRW_OPCODE_AND: return "and"; |
case BRW_OPCODE_OR: return "or"; |
case BRW_OPCODE_XOR: return "xor"; |
case BRW_OPCODE_SHR: return "shr"; |
case BRW_OPCODE_SHL: return "shl"; |
case BRW_OPCODE_RSR: return "rsr"; |
case BRW_OPCODE_RSL: return "rsl"; |
case BRW_OPCODE_ASR: return "asr"; |
case BRW_OPCODE_CMP: return "cmp"; |
case BRW_OPCODE_CMPN: return "cmpn"; |
case BRW_OPCODE_JMPI: return "jmpi"; |
case BRW_OPCODE_IF: return "if"; |
case BRW_OPCODE_IFF: return "iff"; |
case BRW_OPCODE_ELSE: return "else"; |
case BRW_OPCODE_ENDIF: return "endif"; |
case BRW_OPCODE_DO: return "do"; |
case BRW_OPCODE_WHILE: return "while"; |
case BRW_OPCODE_BREAK: return "break"; |
case BRW_OPCODE_CONTINUE: return "continue"; |
case BRW_OPCODE_HALT: return "halt"; |
case BRW_OPCODE_MSAVE: return "msave"; |
case BRW_OPCODE_MRESTORE: return "mrestore"; |
case BRW_OPCODE_PUSH: return "push"; |
case BRW_OPCODE_POP: return "pop"; |
case BRW_OPCODE_WAIT: return "wait"; |
case BRW_OPCODE_SEND: return "send"; |
case BRW_OPCODE_SENDC: return "sendc"; |
case BRW_OPCODE_MATH: return "math"; |
case BRW_OPCODE_ADD: return "add"; |
case BRW_OPCODE_MUL: return "mul"; |
case BRW_OPCODE_AVG: return "avg"; |
case BRW_OPCODE_FRC: return "frc"; |
case BRW_OPCODE_RNDU: return "rndu"; |
case BRW_OPCODE_RNDD: return "rndd"; |
case BRW_OPCODE_RNDE: return "rnde"; |
case BRW_OPCODE_RNDZ: return "rndz"; |
case BRW_OPCODE_MAC: return "mac"; |
case BRW_OPCODE_MACH: return "mach"; |
case BRW_OPCODE_LZD: return "lzd"; |
case BRW_OPCODE_SAD2: return "sad2"; |
case BRW_OPCODE_SADA2: return "sada2"; |
case BRW_OPCODE_DP4: return "dp4"; |
case BRW_OPCODE_DPH: return "dph"; |
case BRW_OPCODE_DP3: return "dp3"; |
case BRW_OPCODE_DP2: return "dp2"; |
case BRW_OPCODE_DPA2: return "dpa2"; |
case BRW_OPCODE_LINE: return "line"; |
case BRW_OPCODE_PLN: return "pln"; |
case BRW_OPCODE_MAD: return "mad"; |
case BRW_OPCODE_NOP: return "nop"; |
/* TGSI */ |
case TOY_OPCODE_TGSI_IN: return "tgsi.in"; |
case TOY_OPCODE_TGSI_CONST: return "tgsi.const"; |
case TOY_OPCODE_TGSI_SV: return "tgsi.sv"; |
case TOY_OPCODE_TGSI_IMM: return "tgsi.imm"; |
case TOY_OPCODE_TGSI_INDIRECT_FETCH: return "tgsi.indirect_fetch"; |
case TOY_OPCODE_TGSI_INDIRECT_STORE: return "tgsi.indirect_store"; |
case TOY_OPCODE_TGSI_TEX: return "tgsi.tex"; |
case TOY_OPCODE_TGSI_TXB: return "tgsi.txb"; |
case TOY_OPCODE_TGSI_TXD: return "tgsi.txd"; |
case TOY_OPCODE_TGSI_TXL: return "tgsi.txl"; |
case TOY_OPCODE_TGSI_TXP: return "tgsi.txp"; |
case TOY_OPCODE_TGSI_TXF: return "tgsi.txf"; |
case TOY_OPCODE_TGSI_TXQ: return "tgsi.txq"; |
case TOY_OPCODE_TGSI_TXQ_LZ: return "tgsi.txq_lz"; |
case TOY_OPCODE_TGSI_TEX2: return "tgsi.tex2"; |
case TOY_OPCODE_TGSI_TXB2: return "tgsi.txb2"; |
case TOY_OPCODE_TGSI_TXL2: return "tgsi.txl2"; |
case TOY_OPCODE_TGSI_SAMPLE: return "tgsi.sample"; |
case TOY_OPCODE_TGSI_SAMPLE_I: return "tgsi.sample_i"; |
case TOY_OPCODE_TGSI_SAMPLE_I_MS: return "tgsi.sample_i_ms"; |
case TOY_OPCODE_TGSI_SAMPLE_B: return "tgsi.sample_b"; |
case TOY_OPCODE_TGSI_SAMPLE_C: return "tgsi.sample_c"; |
case TOY_OPCODE_TGSI_SAMPLE_C_LZ: return "tgsi.sample_c_lz"; |
case TOY_OPCODE_TGSI_SAMPLE_D: return "tgsi.sample_d"; |
case TOY_OPCODE_TGSI_SAMPLE_L: return "tgsi.sample_l"; |
case TOY_OPCODE_TGSI_GATHER4: return "tgsi.gather4"; |
case TOY_OPCODE_TGSI_SVIEWINFO: return "tgsi.sviewinfo"; |
case TOY_OPCODE_TGSI_SAMPLE_POS: return "tgsi.sample_pos"; |
case TOY_OPCODE_TGSI_SAMPLE_INFO: return "tgsi.sample_info"; |
/* math */ |
case TOY_OPCODE_INV: return "math.inv"; |
case TOY_OPCODE_LOG: return "math.log"; |
case TOY_OPCODE_EXP: return "math.exp"; |
case TOY_OPCODE_SQRT: return "math.sqrt"; |
case TOY_OPCODE_RSQ: return "math.rsq"; |
case TOY_OPCODE_SIN: return "math.sin"; |
case TOY_OPCODE_COS: return "math.cos"; |
case TOY_OPCODE_FDIV: return "math.fdiv"; |
case TOY_OPCODE_POW: return "math.pow"; |
case TOY_OPCODE_INT_DIV_QUOTIENT: return "math.int_div_quotient"; |
case TOY_OPCODE_INT_DIV_REMAINDER: return "math.int_div_remainer"; |
/* urb */ |
case TOY_OPCODE_URB_WRITE: return "urb.urb_write"; |
/* gs */ |
case TOY_OPCODE_EMIT: return "gs.emit"; |
case TOY_OPCODE_ENDPRIM: return "gs.endprim"; |
/* fs */ |
case TOY_OPCODE_DDX: return "fs.ddx"; |
case TOY_OPCODE_DDY: return "fs.ddy"; |
case TOY_OPCODE_FB_WRITE: return "fs.fb_write"; |
case TOY_OPCODE_KIL: return "fs.kil"; |
default: return "unk"; |
} |
} |
static const char * |
get_cond_modifier_name(unsigned opcode, unsigned cond_modifier) |
{ |
switch (opcode) { |
case BRW_OPCODE_SEND: |
case BRW_OPCODE_SENDC: |
/* SFID */ |
switch (cond_modifier) { |
case BRW_SFID_NULL: return "Null"; |
case BRW_SFID_SAMPLER: return "Sampling Engine"; |
case BRW_SFID_MESSAGE_GATEWAY: return "Message Gateway"; |
case GEN6_SFID_DATAPORT_SAMPLER_CACHE: return "Data Port Sampler Cache"; |
case GEN6_SFID_DATAPORT_RENDER_CACHE: return "Data Port Render Cache"; |
case BRW_SFID_URB: return "URB"; |
case BRW_SFID_THREAD_SPAWNER: return "Thread Spawner"; |
case GEN6_SFID_DATAPORT_CONSTANT_CACHE: return "Constant Cache"; |
default: return "Unknown"; |
} |
break; |
case BRW_OPCODE_MATH: |
/* FC */ |
switch (cond_modifier) { |
case BRW_MATH_FUNCTION_INV: return "INV"; |
case BRW_MATH_FUNCTION_LOG: return "LOG"; |
case BRW_MATH_FUNCTION_EXP: return "EXP"; |
case BRW_MATH_FUNCTION_SQRT: return "SQRT"; |
case BRW_MATH_FUNCTION_RSQ: return "RSQ"; |
case BRW_MATH_FUNCTION_SIN: return "SIN"; |
case BRW_MATH_FUNCTION_COS: return "COS"; |
case BRW_MATH_FUNCTION_FDIV: return "FDIV"; |
case BRW_MATH_FUNCTION_POW: return "POW"; |
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: return "INT DIV (quotient)"; |
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: return "INT DIV (remainder)"; |
default: return "UNK"; |
} |
break; |
default: |
switch (cond_modifier) { |
case BRW_CONDITIONAL_NONE: return NULL; |
case BRW_CONDITIONAL_Z: return "z"; |
case BRW_CONDITIONAL_NZ: return "nz"; |
case BRW_CONDITIONAL_G: return "g"; |
case BRW_CONDITIONAL_GE: return "ge"; |
case BRW_CONDITIONAL_L: return "l"; |
case BRW_CONDITIONAL_LE: return "le"; |
default: return "unk"; |
} |
break; |
} |
} |
/** |
* Dump an instruction. |
*/ |
static void |
tc_dump_inst(struct toy_compiler *tc, const struct toy_inst *inst) |
{ |
const char *name; |
int i; |
name = get_opcode_name(inst->opcode); |
ilo_printf(" %s", name); |
if (inst->opcode == BRW_OPCODE_NOP) { |
ilo_printf("\n"); |
return; |
} |
if (inst->saturate) |
ilo_printf(".sat"); |
name = get_cond_modifier_name(inst->opcode, inst->cond_modifier); |
if (name) |
ilo_printf(".%s", name); |
ilo_printf(" "); |
tc_dump_dst(tc, inst->dst); |
for (i = 0; i < Elements(inst->src); i++) { |
if (tsrc_is_null(inst->src[i])) |
break; |
ilo_printf(", "); |
tc_dump_src(tc, inst->src[i]); |
} |
ilo_printf("\n"); |
} |
/** |
* Dump the instructions added to the compiler. |
*/ |
void |
toy_compiler_dump(struct toy_compiler *tc) |
{ |
struct toy_inst *inst; |
int pc; |
pc = 0; |
tc_head(tc); |
while ((inst = tc_next_no_skip(tc)) != NULL) { |
/* we do not generate code for markers */ |
if (inst->marker) |
ilo_printf("marker:"); |
else |
ilo_printf("%6d:", pc++); |
tc_dump_inst(tc, inst); |
} |
} |
/** |
* Clean up the toy compiler. |
*/ |
void |
toy_compiler_cleanup(struct toy_compiler *tc) |
{ |
struct toy_inst *inst, *next; |
LIST_FOR_EACH_ENTRY_SAFE(inst, next, &tc->instructions, list) |
util_slab_free(&tc->mempool, inst); |
util_slab_destroy(&tc->mempool); |
} |
/** |
* Initialize the instruction template, from which tc_add() initializes the |
* newly added instructions. |
*/ |
static void |
tc_init_inst_templ(struct toy_compiler *tc) |
{ |
struct toy_inst *templ = &tc->templ; |
int i; |
templ->opcode = BRW_OPCODE_NOP; |
templ->access_mode = BRW_ALIGN_1; |
templ->mask_ctrl = BRW_MASK_ENABLE; |
templ->dep_ctrl = BRW_DEPENDENCY_NORMAL; |
templ->qtr_ctrl = GEN6_COMPRESSION_1Q; |
templ->thread_ctrl = BRW_THREAD_NORMAL; |
templ->pred_ctrl = BRW_PREDICATE_NONE; |
templ->pred_inv = false; |
templ->exec_size = BRW_EXECUTE_1; |
templ->cond_modifier = BRW_CONDITIONAL_NONE; |
templ->acc_wr_ctrl = false; |
templ->saturate = false; |
templ->marker = false; |
templ->dst = tdst_null(); |
for (i = 0; i < Elements(templ->src); i++) |
templ->src[i] = tsrc_null(); |
for (i = 0; i < Elements(templ->tex.offsets); i++) |
templ->tex.offsets[i] = tsrc_null(); |
list_inithead(&templ->list); |
} |
/** |
* Initialize the toy compiler. |
*/ |
void |
toy_compiler_init(struct toy_compiler *tc, const struct ilo_dev_info *dev) |
{ |
memset(tc, 0, sizeof(*tc)); |
tc->dev = dev; |
tc_init_inst_templ(tc); |
util_slab_create(&tc->mempool, sizeof(struct toy_inst), |
64, UTIL_SLAB_SINGLETHREADED); |
list_inithead(&tc->instructions); |
/* instructions are added to the tail */ |
tc_tail(tc); |
tc->rect_linear_width = 1; |
/* skip 0 so that util_hash_table_get() never returns NULL */ |
tc->next_vrf = 1; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler.h |
---|
0,0 → 1,473 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef TOY_COMPILER_H |
#define TOY_COMPILER_H |
#include "util/u_slab.h" |
#include "brw_defines.h" |
#include "ilo_common.h" |
#include "toy_compiler_reg.h" |
/** |
* Toy opcodes. |
*/ |
enum toy_opcode { |
/* 0..127 are reserved for BRW_OPCODE_x */ |
TOY_OPCODE_LAST_HW = 127, |
/* TGSI register functions */ |
TOY_OPCODE_TGSI_IN, |
TOY_OPCODE_TGSI_CONST, |
TOY_OPCODE_TGSI_SV, |
TOY_OPCODE_TGSI_IMM, |
TOY_OPCODE_TGSI_INDIRECT_FETCH, |
TOY_OPCODE_TGSI_INDIRECT_STORE, |
/* TGSI sampling functions */ |
TOY_OPCODE_TGSI_TEX, |
TOY_OPCODE_TGSI_TXB, |
TOY_OPCODE_TGSI_TXD, |
TOY_OPCODE_TGSI_TXL, |
TOY_OPCODE_TGSI_TXP, |
TOY_OPCODE_TGSI_TXF, |
TOY_OPCODE_TGSI_TXQ, |
TOY_OPCODE_TGSI_TXQ_LZ, |
TOY_OPCODE_TGSI_TEX2, |
TOY_OPCODE_TGSI_TXB2, |
TOY_OPCODE_TGSI_TXL2, |
TOY_OPCODE_TGSI_SAMPLE, |
TOY_OPCODE_TGSI_SAMPLE_I, |
TOY_OPCODE_TGSI_SAMPLE_I_MS, |
TOY_OPCODE_TGSI_SAMPLE_B, |
TOY_OPCODE_TGSI_SAMPLE_C, |
TOY_OPCODE_TGSI_SAMPLE_C_LZ, |
TOY_OPCODE_TGSI_SAMPLE_D, |
TOY_OPCODE_TGSI_SAMPLE_L, |
TOY_OPCODE_TGSI_GATHER4, |
TOY_OPCODE_TGSI_SVIEWINFO, |
TOY_OPCODE_TGSI_SAMPLE_POS, |
TOY_OPCODE_TGSI_SAMPLE_INFO, |
/* math functions */ |
TOY_OPCODE_INV, |
TOY_OPCODE_LOG, |
TOY_OPCODE_EXP, |
TOY_OPCODE_SQRT, |
TOY_OPCODE_RSQ, |
TOY_OPCODE_SIN, |
TOY_OPCODE_COS, |
TOY_OPCODE_FDIV, |
TOY_OPCODE_POW, |
TOY_OPCODE_INT_DIV_QUOTIENT, |
TOY_OPCODE_INT_DIV_REMAINDER, |
/* URB functions */ |
TOY_OPCODE_URB_WRITE, |
/* GS-specific functions */ |
TOY_OPCODE_EMIT, |
TOY_OPCODE_ENDPRIM, |
/* FS-specific functions */ |
TOY_OPCODE_DDX, |
TOY_OPCODE_DDY, |
TOY_OPCODE_FB_WRITE, |
TOY_OPCODE_KIL, |
}; |
/** |
* Toy instruction. |
*/ |
struct toy_inst { |
unsigned opcode:8; /* enum toy_opcode */ |
unsigned access_mode:1; /* BRW_ALIGN_x */ |
unsigned mask_ctrl:1; /* BRW_MASK_x */ |
unsigned dep_ctrl:2; /* BRW_DEPENDENCY_x */ |
unsigned qtr_ctrl:2; /* GEN6_COMPRESSION_x */ |
unsigned thread_ctrl:2; /* BRW_THREAD_x */ |
unsigned pred_ctrl:4; /* BRW_PREDICATE_x */ |
unsigned pred_inv:1; /* true or false */ |
unsigned exec_size:3; /* BRW_EXECUTE_x */ |
unsigned cond_modifier:4; /* BRW_CONDITIONAL_x */ |
unsigned acc_wr_ctrl:1; /* true or false */ |
unsigned saturate:1; /* true or false */ |
/* true if the instruction should be ignored for instruction iteration */ |
unsigned marker:1; |
unsigned pad:1; |
struct toy_dst dst; |
struct toy_src src[5]; /* match TGSI_FULL_MAX_SRC_REGISTERS */ |
struct { |
int target; /* TGSI_TEXTURE_x */ |
struct toy_src offsets[1]; /* need to be 4 when GATHER4 is supported */ |
} tex; |
struct list_head list; |
}; |
/** |
* Toy compiler. |
*/ |
struct toy_compiler { |
const struct ilo_dev_info *dev; |
struct toy_inst templ; |
struct util_slab_mempool mempool; |
struct list_head instructions; |
struct list_head *iter, *iter_next; |
/* this is not set until toy_compiler_legalize_for_asm() */ |
int num_instructions; |
int rect_linear_width; |
int next_vrf; |
bool fail; |
const char *reason; |
}; |
/** |
* Allocate the given number of VRF registers. |
*/ |
static inline int |
tc_alloc_vrf(struct toy_compiler *tc, int count) |
{ |
const int vrf = tc->next_vrf; |
tc->next_vrf += count; |
return vrf; |
} |
/** |
* Allocate a temporary register. |
*/ |
static inline struct toy_dst |
tc_alloc_tmp(struct toy_compiler *tc) |
{ |
return tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, 1), 0); |
} |
/** |
* Allocate four temporary registers. |
*/ |
static inline void |
tc_alloc_tmp4(struct toy_compiler *tc, struct toy_dst *tmp) |
{ |
tmp[0] = tc_alloc_tmp(tc); |
tmp[1] = tc_alloc_tmp(tc); |
tmp[2] = tc_alloc_tmp(tc); |
tmp[3] = tc_alloc_tmp(tc); |
} |
/** |
* Duplicate an instruction at the current location. |
*/ |
static inline struct toy_inst * |
tc_duplicate_inst(struct toy_compiler *tc, const struct toy_inst *inst) |
{ |
struct toy_inst *new_inst; |
new_inst = util_slab_alloc(&tc->mempool); |
if (!new_inst) |
return NULL; |
*new_inst = *inst; |
list_addtail(&new_inst->list, tc->iter_next); |
return new_inst; |
} |
/** |
* Move an instruction to the current location. |
*/ |
static inline void |
tc_move_inst(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
list_del(&inst->list); |
list_addtail(&inst->list, tc->iter_next); |
} |
/** |
* Discard an instruction. |
*/ |
static inline void |
tc_discard_inst(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
list_del(&inst->list); |
util_slab_free(&tc->mempool, inst); |
} |
/** |
* Add a new instruction at the current location, using tc->templ as the |
* template. |
*/ |
static inline struct toy_inst * |
tc_add(struct toy_compiler *tc) |
{ |
return tc_duplicate_inst(tc, &tc->templ); |
} |
/** |
* A convenient version of tc_add() for instructions with 3 source operands. |
*/ |
static inline struct toy_inst * |
tc_add3(struct toy_compiler *tc, unsigned opcode, |
struct toy_dst dst, |
struct toy_src src0, |
struct toy_src src1, |
struct toy_src src2) |
{ |
struct toy_inst *inst; |
inst = tc_add(tc); |
if (!inst) |
return NULL; |
inst->opcode = opcode; |
inst->dst = dst; |
inst->src[0] = src0; |
inst->src[1] = src1; |
inst->src[2] = src2; |
return inst; |
} |
/** |
* A convenient version of tc_add() for instructions with 2 source operands. |
*/ |
static inline struct toy_inst * |
tc_add2(struct toy_compiler *tc, int opcode, |
struct toy_dst dst, |
struct toy_src src0, |
struct toy_src src1) |
{ |
return tc_add3(tc, opcode, dst, src0, src1, tsrc_null()); |
} |
/** |
* A convenient version of tc_add() for instructions with 1 source operand. |
*/ |
static inline struct toy_inst * |
tc_add1(struct toy_compiler *tc, unsigned opcode, |
struct toy_dst dst, |
struct toy_src src0) |
{ |
return tc_add2(tc, opcode, dst, src0, tsrc_null()); |
} |
/** |
* A convenient version of tc_add() for instructions without source or |
* destination operands. |
*/ |
static inline struct toy_inst * |
tc_add0(struct toy_compiler *tc, unsigned opcode) |
{ |
return tc_add1(tc, opcode, tdst_null(), tsrc_null()); |
} |
#define TC_ALU0(func, opcode) \ |
static inline struct toy_inst * \ |
func(struct toy_compiler *tc) \ |
{ \ |
return tc_add0(tc, opcode); \ |
} |
#define TC_ALU1(func, opcode) \ |
static inline struct toy_inst * \ |
func(struct toy_compiler *tc, \ |
struct toy_dst dst, \ |
struct toy_src src) \ |
{ \ |
return tc_add1(tc, opcode, dst, src); \ |
} |
#define TC_ALU2(func, opcode) \ |
static inline struct toy_inst * \ |
func(struct toy_compiler *tc, \ |
struct toy_dst dst, \ |
struct toy_src src0, \ |
struct toy_src src1) \ |
{ \ |
return tc_add2(tc, opcode, \ |
dst, src0, src1); \ |
} |
#define TC_ALU3(func, opcode) \ |
static inline struct toy_inst * \ |
func(struct toy_compiler *tc, \ |
struct toy_dst dst, \ |
struct toy_src src0, \ |
struct toy_src src1, \ |
struct toy_src src2) \ |
{ \ |
return tc_add3(tc, opcode, \ |
dst, src0, src1, src2); \ |
} |
#define TC_CND2(func, opcode) \ |
static inline struct toy_inst * \ |
func(struct toy_compiler *tc, \ |
struct toy_dst dst, \ |
struct toy_src src0, \ |
struct toy_src src1, \ |
unsigned cond_modifier) \ |
{ \ |
struct toy_inst *inst; \ |
inst = tc_add2(tc, opcode, \ |
dst, src0, src1); \ |
inst->cond_modifier = cond_modifier; \ |
return inst; \ |
} |
TC_ALU0(tc_NOP, BRW_OPCODE_NOP) |
TC_ALU0(tc_ELSE, BRW_OPCODE_ELSE) |
TC_ALU0(tc_ENDIF, BRW_OPCODE_ENDIF) |
TC_ALU1(tc_MOV, BRW_OPCODE_MOV) |
TC_ALU1(tc_RNDD, BRW_OPCODE_RNDD) |
TC_ALU1(tc_INV, TOY_OPCODE_INV) |
TC_ALU1(tc_FRC, BRW_OPCODE_FRC) |
TC_ALU1(tc_EXP, TOY_OPCODE_EXP) |
TC_ALU1(tc_LOG, TOY_OPCODE_LOG) |
TC_ALU2(tc_ADD, BRW_OPCODE_ADD) |
TC_ALU2(tc_MUL, BRW_OPCODE_MUL) |
TC_ALU2(tc_AND, BRW_OPCODE_AND) |
TC_ALU2(tc_OR, BRW_OPCODE_OR) |
TC_ALU2(tc_DP2, BRW_OPCODE_DP2) |
TC_ALU2(tc_DP3, BRW_OPCODE_DP3) |
TC_ALU2(tc_DP4, BRW_OPCODE_DP4) |
TC_ALU2(tc_SHL, BRW_OPCODE_SHL) |
TC_ALU2(tc_SHR, BRW_OPCODE_SHR) |
TC_ALU2(tc_POW, TOY_OPCODE_POW) |
TC_ALU3(tc_MAC, BRW_OPCODE_MAC) |
TC_CND2(tc_SEL, BRW_OPCODE_SEL) |
TC_CND2(tc_CMP, BRW_OPCODE_CMP) |
TC_CND2(tc_IF, BRW_OPCODE_IF) |
TC_CND2(tc_SEND, BRW_OPCODE_SEND) |
/** |
* Upcast a list_head to an instruction. |
*/ |
static inline struct toy_inst * |
tc_list_to_inst(struct toy_compiler *tc, struct list_head *item) |
{ |
return container_of(item, (struct toy_inst *) NULL, list); |
} |
/** |
* Return the instruction at the current location. |
*/ |
static inline struct toy_inst * |
tc_current(struct toy_compiler *tc) |
{ |
return (tc->iter != &tc->instructions) ? |
tc_list_to_inst(tc, tc->iter) : NULL; |
} |
/** |
* Set the current location to the head. |
*/ |
static inline void |
tc_head(struct toy_compiler *tc) |
{ |
tc->iter = &tc->instructions; |
tc->iter_next = tc->iter->next; |
} |
/** |
* Set the current location to the tail. |
*/ |
static inline void |
tc_tail(struct toy_compiler *tc) |
{ |
tc->iter = &tc->instructions; |
tc->iter_next = tc->iter; |
} |
/** |
* Advance the current location. |
*/ |
static inline struct toy_inst * |
tc_next_no_skip(struct toy_compiler *tc) |
{ |
/* stay at the tail so that new instructions are added there */ |
if (tc->iter_next == &tc->instructions) { |
tc_tail(tc); |
return NULL; |
} |
tc->iter = tc->iter_next; |
tc->iter_next = tc->iter_next->next; |
return tc_list_to_inst(tc, tc->iter); |
} |
/** |
* Advance the current location, skipping markers. |
*/ |
static inline struct toy_inst * |
tc_next(struct toy_compiler *tc) |
{ |
struct toy_inst *inst; |
do { |
inst = tc_next_no_skip(tc); |
} while (inst && inst->marker); |
return inst; |
} |
static inline void |
tc_fail(struct toy_compiler *tc, const char *reason) |
{ |
if (!tc->fail) { |
tc->fail = true; |
tc->reason = reason; |
} |
} |
void |
toy_compiler_init(struct toy_compiler *tc, const struct ilo_dev_info *dev); |
void |
toy_compiler_cleanup(struct toy_compiler *tc); |
void |
toy_compiler_dump(struct toy_compiler *tc); |
void * |
toy_compiler_assemble(struct toy_compiler *tc, int *size); |
void |
toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size); |
#endif /* TOY_COMPILER_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler_asm.c |
---|
0,0 → 1,750 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "toy_compiler.h" |
#define CG_REG_SHIFT 5 |
#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT) |
struct codegen { |
const struct toy_inst *inst; |
int pc; |
unsigned flag_sub_reg_num; |
struct codegen_dst { |
unsigned file; |
unsigned type; |
bool indirect; |
unsigned indirect_subreg; |
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
unsigned horz_stride; |
unsigned writemask; |
} dst; |
struct codegen_src { |
unsigned file; |
unsigned type; |
bool indirect; |
unsigned indirect_subreg; |
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
unsigned vert_stride; |
unsigned width; |
unsigned horz_stride; |
unsigned swizzle[4]; |
bool absolute; |
bool negate; |
} src[3]; |
}; |
/** |
* Return true if the source operand is null. |
*/ |
static bool |
src_is_null(const struct codegen *cg, int idx) |
{ |
const struct codegen_src *src = &cg->src[idx]; |
return (src->file == BRW_ARCHITECTURE_REGISTER_FILE && |
src->origin == BRW_ARF_NULL << CG_REG_SHIFT); |
} |
/** |
* Translate a source operand to DW2 or DW3 of the 1-src/2-src format. |
*/ |
static uint32_t |
translate_src(const struct codegen *cg, int idx) |
{ |
const struct codegen_src *src = &cg->src[idx]; |
uint32_t dw; |
/* special treatment may be needed if any of the operand is immediate */ |
if (cg->src[0].file == BRW_IMMEDIATE_VALUE) { |
assert(!cg->src[0].absolute && !cg->src[0].negate); |
/* only the last src operand can be an immediate */ |
assert(src_is_null(cg, 1)); |
if (idx == 0) |
return cg->flag_sub_reg_num << 25; |
else |
return cg->src[0].origin; |
} |
else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) { |
assert(!cg->src[1].absolute && !cg->src[1].negate); |
return cg->src[1].origin; |
} |
assert(src->file != BRW_IMMEDIATE_VALUE); |
if (src->indirect) { |
const int offset = (int) src->origin; |
assert(src->file == BRW_GENERAL_REGISTER_FILE); |
assert(offset < 512 && offset >= -512); |
if (cg->inst->access_mode == BRW_ALIGN_16) { |
assert(src->width == BRW_WIDTH_4); |
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
/* the lower 4 bits are reserved for the swizzle_[xy] */ |
assert(!(src->origin & 0xf)); |
dw = src->vert_stride << 21 | |
src->swizzle[3] << 18 | |
src->swizzle[2] << 16 | |
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
src->negate << 14 | |
src->absolute << 13 | |
src->indirect_subreg << 10 | |
(src->origin & 0x3f0) | |
src->swizzle[1] << 2 | |
src->swizzle[0]; |
} |
else { |
assert(src->swizzle[0] == TOY_SWIZZLE_X && |
src->swizzle[1] == TOY_SWIZZLE_Y && |
src->swizzle[2] == TOY_SWIZZLE_Z && |
src->swizzle[3] == TOY_SWIZZLE_W); |
dw = src->vert_stride << 21 | |
src->width << 18 | |
src->horz_stride << 16 | |
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
src->negate << 14 | |
src->absolute << 13 | |
src->indirect_subreg << 10 | |
(src->origin & 0x3ff); |
} |
} |
else { |
switch (src->file) { |
case BRW_ARCHITECTURE_REGISTER_FILE: |
break; |
case BRW_GENERAL_REGISTER_FILE: |
assert(CG_REG_NUM(src->origin) < 128); |
break; |
case BRW_MESSAGE_REGISTER_FILE: |
assert(cg->inst->opcode == BRW_OPCODE_SEND || |
cg->inst->opcode == BRW_OPCODE_SENDC); |
assert(CG_REG_NUM(src->origin) < 16); |
break; |
case BRW_IMMEDIATE_VALUE: |
default: |
assert(!"invalid src file"); |
break; |
} |
if (cg->inst->access_mode == BRW_ALIGN_16) { |
assert(src->width == BRW_WIDTH_4); |
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
/* the lower 4 bits are reserved for the swizzle_[xy] */ |
assert(!(src->origin & 0xf)); |
dw = src->vert_stride << 21 | |
src->swizzle[3] << 18 | |
src->swizzle[2] << 16 | |
BRW_ADDRESS_DIRECT << 15 | |
src->negate << 14 | |
src->absolute << 13 | |
src->origin | |
src->swizzle[1] << 2 | |
src->swizzle[0]; |
} |
else { |
assert(src->swizzle[0] == TOY_SWIZZLE_X && |
src->swizzle[1] == TOY_SWIZZLE_Y && |
src->swizzle[2] == TOY_SWIZZLE_Z && |
src->swizzle[3] == TOY_SWIZZLE_W); |
dw = src->vert_stride << 21 | |
src->width << 18 | |
src->horz_stride << 16 | |
BRW_ADDRESS_DIRECT << 15 | |
src->negate << 14 | |
src->absolute << 13 | |
src->origin; |
} |
} |
if (idx == 0) |
dw |= cg->flag_sub_reg_num << 25; |
return dw; |
} |
/** |
* Translate the destination operand to the higher 16 bits of DW1 of the |
* 1-src/2-src format. |
*/ |
static uint16_t |
translate_dst_region(const struct codegen *cg) |
{ |
const struct codegen_dst *dst = &cg->dst; |
uint16_t dw1_region; |
if (dst->file == BRW_IMMEDIATE_VALUE) { |
/* dst is immediate (JIP) when the opcode is a conditional branch */ |
switch (cg->inst->opcode) { |
case BRW_OPCODE_IF: |
case BRW_OPCODE_ELSE: |
case BRW_OPCODE_ENDIF: |
case BRW_OPCODE_WHILE: |
assert(dst->type == BRW_REGISTER_TYPE_W); |
dw1_region = (dst->origin & 0xffff); |
break; |
default: |
assert(!"dst cannot be immediate"); |
dw1_region = 0; |
break; |
} |
return dw1_region; |
} |
if (dst->indirect) { |
const int offset = (int) dst->origin; |
assert(dst->file == BRW_GENERAL_REGISTER_FILE); |
assert(offset < 512 && offset >= -512); |
if (cg->inst->access_mode == BRW_ALIGN_16) { |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 144: |
* |
* "Allthough Dst.HorzStride is a don't care for Align16, HW |
* needs this to be programmed as 01." |
*/ |
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
/* the lower 4 bits are reserved for the writemask */ |
assert(!(dst->origin & 0xf)); |
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
dst->horz_stride << 13 | |
dst->indirect_subreg << 10 | |
(dst->origin & 0x3f0) | |
dst->writemask; |
} |
else { |
assert(dst->writemask == TOY_WRITEMASK_XYZW); |
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 | |
dst->horz_stride << 13 | |
dst->indirect_subreg << 10 | |
(dst->origin & 0x3ff); |
} |
} |
else { |
assert((dst->file == BRW_GENERAL_REGISTER_FILE && |
CG_REG_NUM(dst->origin) < 128) || |
(dst->file == BRW_MESSAGE_REGISTER_FILE && |
CG_REG_NUM(dst->origin) < 16) || |
(dst->file == BRW_ARCHITECTURE_REGISTER_FILE)); |
if (cg->inst->access_mode == BRW_ALIGN_16) { |
/* similar to the indirect case */ |
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
assert(!(dst->origin & 0xf)); |
dw1_region = BRW_ADDRESS_DIRECT << 15 | |
dst->horz_stride << 13 | |
dst->origin | |
dst->writemask; |
} |
else { |
assert(dst->writemask == TOY_WRITEMASK_XYZW); |
dw1_region = BRW_ADDRESS_DIRECT << 15 | |
dst->horz_stride << 13 | |
dst->origin; |
} |
} |
return dw1_region; |
} |
/** |
* Translate the destination operand to DW1 of the 1-src/2-src format. |
*/ |
static uint32_t |
translate_dst(const struct codegen *cg) |
{ |
return translate_dst_region(cg) << 16 | |
cg->src[1].type << 12 | |
cg->src[1].file << 10 | |
cg->src[0].type << 7 | |
cg->src[0].file << 5 | |
cg->dst.type << 2 | |
cg->dst.file; |
} |
/** |
* Translate the instruction to DW0 of the 1-src/2-src format. |
*/ |
static uint32_t |
translate_inst(const struct codegen *cg) |
{ |
const bool debug_ctrl = false; |
const bool cmpt_ctrl = false; |
assert(cg->inst->opcode < 128); |
return cg->inst->saturate << 31 | |
debug_ctrl << 30 | |
cmpt_ctrl << 29 | |
cg->inst->acc_wr_ctrl << 28 | |
cg->inst->cond_modifier << 24 | |
cg->inst->exec_size << 21 | |
cg->inst->pred_inv << 20 | |
cg->inst->pred_ctrl << 16 | |
cg->inst->thread_ctrl << 14 | |
cg->inst->qtr_ctrl << 12 | |
cg->inst->dep_ctrl << 10 | |
cg->inst->mask_ctrl << 9 | |
cg->inst->access_mode << 8 | |
cg->inst->opcode; |
} |
/** |
* Codegen an instruction in 1-src/2-src format. |
*/ |
static void |
codegen_inst(const struct codegen *cg, uint32_t *code) |
{ |
code[0] = translate_inst(cg); |
code[1] = translate_dst(cg); |
code[2] = translate_src(cg, 0); |
code[3] = translate_src(cg, 1); |
assert(src_is_null(cg, 2)); |
} |
/** |
* Codegen an instruction in 3-src format. |
*/ |
static void |
codegen_inst_3src(const struct codegen *cg, uint32_t *code) |
{ |
const struct codegen_dst *dst = &cg->dst; |
uint32_t dw0, dw1, dw_src[3]; |
int i; |
dw0 = translate_inst(cg); |
/* |
* 3-src instruction restrictions |
* |
* - align16 with direct addressing |
* - GRF or MRF dst |
* - GRF src |
* - sub_reg_num is DWORD aligned |
* - no regioning except replication control |
* (vert_stride == 0 && horz_stride == 0) |
*/ |
assert(cg->inst->access_mode == BRW_ALIGN_16); |
assert(!dst->indirect); |
assert((dst->file == BRW_GENERAL_REGISTER_FILE && |
CG_REG_NUM(dst->origin) < 128) || |
(dst->file == BRW_MESSAGE_REGISTER_FILE && |
CG_REG_NUM(dst->origin) < 16)); |
assert(!(dst->origin & 0x3)); |
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1); |
dw1 = dst->origin << 19 | |
dst->writemask << 17 | |
cg->src[2].negate << 9 | |
cg->src[2].absolute << 8 | |
cg->src[1].negate << 7 | |
cg->src[1].absolute << 6 | |
cg->src[0].negate << 5 | |
cg->src[0].absolute << 4 | |
cg->flag_sub_reg_num << 1 | |
(dst->file == BRW_MESSAGE_REGISTER_FILE); |
for (i = 0; i < 3; i++) { |
const struct codegen_src *src = &cg->src[i]; |
assert(!src->indirect); |
assert(src->file == BRW_GENERAL_REGISTER_FILE && |
CG_REG_NUM(src->origin) < 128); |
assert(!(src->origin & 0x3)); |
assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 && |
src->horz_stride == BRW_HORIZONTAL_STRIDE_1) || |
(src->vert_stride == BRW_VERTICAL_STRIDE_0 && |
src->horz_stride == BRW_HORIZONTAL_STRIDE_0)); |
assert(src->width == BRW_WIDTH_4); |
dw_src[i] = src->origin << 7 | |
src->swizzle[3] << 7 | |
src->swizzle[2] << 5 | |
src->swizzle[1] << 3 | |
src->swizzle[0] << 1 | |
(src->vert_stride == BRW_VERTICAL_STRIDE_0 && |
src->horz_stride == BRW_HORIZONTAL_STRIDE_0); |
/* only the lower 20 bits are used */ |
assert((dw_src[i] & 0xfffff) == dw_src[i]); |
} |
code[0] = dw0; |
code[1] = dw1; |
/* concatenate the bits of dw_src */ |
code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0]; |
code[3] = dw_src[2] << 10 | (dw_src[1] >> 11); |
} |
/** |
* Sanity check the region parameters of the operands. |
*/ |
static void |
codegen_validate_region_restrictions(const struct codegen *cg) |
{ |
const int exec_size_map[] = { |
[BRW_EXECUTE_1] = 1, |
[BRW_EXECUTE_2] = 2, |
[BRW_EXECUTE_4] = 4, |
[BRW_EXECUTE_8] = 8, |
[BRW_EXECUTE_16] = 16, |
[BRW_EXECUTE_32] = 32, |
}; |
const int width_map[] = { |
[BRW_WIDTH_1] = 1, |
[BRW_WIDTH_2] = 2, |
[BRW_WIDTH_4] = 4, |
[BRW_WIDTH_8] = 8, |
[BRW_WIDTH_16] = 16, |
}; |
const int horz_stride_map[] = { |
[BRW_HORIZONTAL_STRIDE_0] = 0, |
[BRW_HORIZONTAL_STRIDE_1] = 1, |
[BRW_HORIZONTAL_STRIDE_2] = 2, |
[BRW_HORIZONTAL_STRIDE_4] = 4, |
}; |
const int vert_stride_map[] = { |
[BRW_VERTICAL_STRIDE_0] = 0, |
[BRW_VERTICAL_STRIDE_1] = 1, |
[BRW_VERTICAL_STRIDE_2] = 2, |
[BRW_VERTICAL_STRIDE_4] = 4, |
[BRW_VERTICAL_STRIDE_8] = 8, |
[BRW_VERTICAL_STRIDE_16] = 16, |
[BRW_VERTICAL_STRIDE_32] = 32, |
[BRW_VERTICAL_STRIDE_64] = 64, |
[BRW_VERTICAL_STRIDE_128] = 128, |
[BRW_VERTICAL_STRIDE_256] = 256, |
[BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0, |
}; |
const int exec_size = exec_size_map[cg->inst->exec_size]; |
int i; |
/* Sandy Bridge PRM, volume 4 part 2, page 94 */ |
/* 1. (we don't do 32 anyway) */ |
assert(exec_size <= 16); |
for (i = 0; i < Elements(cg->src); i++) { |
const int width = width_map[cg->src[i].width]; |
const int horz_stride = horz_stride_map[cg->src[i].horz_stride]; |
const int vert_stride = vert_stride_map[cg->src[i].vert_stride]; |
if (src_is_null(cg, i)) |
break; |
/* 3. */ |
assert(exec_size >= width); |
if (exec_size == width) { |
/* 4. & 5. */ |
if (horz_stride) |
assert(vert_stride == width * horz_stride); |
} |
if (width == 1) { |
/* 6. */ |
assert(horz_stride == 0); |
/* 7. */ |
if (exec_size == 1) |
assert(vert_stride == 0); |
} |
/* 8. */ |
if (!vert_stride && !horz_stride) |
assert(width == 1); |
} |
/* derived from 10.1.2. & 10.2. */ |
assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0); |
} |
static unsigned |
translate_vfile(enum toy_file file) |
{ |
switch (file) { |
case TOY_FILE_ARF: return BRW_ARCHITECTURE_REGISTER_FILE; |
case TOY_FILE_GRF: return BRW_GENERAL_REGISTER_FILE; |
case TOY_FILE_MRF: return BRW_MESSAGE_REGISTER_FILE; |
case TOY_FILE_IMM: return BRW_IMMEDIATE_VALUE; |
default: |
assert(!"unhandled toy file"); |
return BRW_GENERAL_REGISTER_FILE; |
} |
} |
static unsigned |
translate_vtype(enum toy_type type) |
{ |
switch (type) { |
case TOY_TYPE_F: return BRW_REGISTER_TYPE_F; |
case TOY_TYPE_D: return BRW_REGISTER_TYPE_D; |
case TOY_TYPE_UD: return BRW_REGISTER_TYPE_UD; |
case TOY_TYPE_W: return BRW_REGISTER_TYPE_W; |
case TOY_TYPE_UW: return BRW_REGISTER_TYPE_UW; |
case TOY_TYPE_V: return BRW_REGISTER_TYPE_V; |
default: |
assert(!"unhandled toy type"); |
return BRW_REGISTER_TYPE_F; |
} |
} |
static unsigned |
translate_writemask(enum toy_writemask writemask) |
{ |
/* TOY_WRITEMASK_* are compatible with the hardware definitions */ |
assert(writemask <= 0xf); |
return writemask; |
} |
static unsigned |
translate_swizzle(enum toy_swizzle swizzle) |
{ |
/* TOY_SWIZZLE_* are compatible with the hardware definitions */ |
assert(swizzle <= 3); |
return swizzle; |
} |
/** |
* Prepare for generating an instruction. |
*/ |
static void |
codegen_prepare(struct codegen *cg, const struct toy_inst *inst, |
int pc, int rect_linear_width) |
{ |
int i; |
cg->inst = inst; |
cg->pc = pc; |
cg->flag_sub_reg_num = 0; |
cg->dst.file = translate_vfile(inst->dst.file); |
cg->dst.type = translate_vtype(inst->dst.type); |
cg->dst.indirect = inst->dst.indirect; |
cg->dst.indirect_subreg = inst->dst.indirect_subreg; |
cg->dst.origin = inst->dst.val32; |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 81: |
* |
* "For a word or an unsigned word immediate data, software must |
* replicate the same 16-bit immediate value to both the lower word |
* and the high word of the 32-bit immediate field in an instruction." |
*/ |
if (inst->dst.file == TOY_FILE_IMM) { |
switch (inst->dst.type) { |
case TOY_TYPE_W: |
case TOY_TYPE_UW: |
cg->dst.origin &= 0xffff; |
cg->dst.origin |= cg->dst.origin << 16; |
break; |
default: |
break; |
} |
} |
cg->dst.writemask = translate_writemask(inst->dst.writemask); |
switch (inst->dst.rect) { |
case TOY_RECT_LINEAR: |
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1; |
break; |
default: |
assert(!"unsupported dst region"); |
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1; |
break; |
} |
for (i = 0; i < Elements(cg->src); i++) { |
struct codegen_src *src = &cg->src[i]; |
src->file = translate_vfile(inst->src[i].file); |
src->type = translate_vtype(inst->src[i].type); |
src->indirect = inst->src[i].indirect; |
src->indirect_subreg = inst->src[i].indirect_subreg; |
src->origin = inst->src[i].val32; |
/* do the same for src */ |
if (inst->dst.file == TOY_FILE_IMM) { |
switch (inst->src[i].type) { |
case TOY_TYPE_W: |
case TOY_TYPE_UW: |
src->origin &= 0xffff; |
src->origin |= src->origin << 16; |
break; |
default: |
break; |
} |
} |
src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x); |
src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y); |
src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z); |
src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w); |
src->absolute = inst->src[i].absolute; |
src->negate = inst->src[i].negate; |
switch (inst->src[i].rect) { |
case TOY_RECT_LINEAR: |
switch (rect_linear_width) { |
case 1: |
src->vert_stride = BRW_VERTICAL_STRIDE_1; |
src->width = BRW_WIDTH_1; |
break; |
case 2: |
src->vert_stride = BRW_VERTICAL_STRIDE_2; |
src->width = BRW_WIDTH_2; |
break; |
case 4: |
src->vert_stride = BRW_VERTICAL_STRIDE_4; |
src->width = BRW_WIDTH_4; |
break; |
case 8: |
src->vert_stride = BRW_VERTICAL_STRIDE_8; |
src->width = BRW_WIDTH_8; |
break; |
case 16: |
src->vert_stride = BRW_VERTICAL_STRIDE_16; |
src->width = BRW_WIDTH_16; |
break; |
default: |
assert(!"unsupported TOY_RECT_LINEAR width"); |
src->vert_stride = BRW_VERTICAL_STRIDE_1; |
src->width = BRW_WIDTH_1; |
break; |
} |
src->horz_stride = BRW_HORIZONTAL_STRIDE_1; |
break; |
case TOY_RECT_041: |
src->vert_stride = BRW_VERTICAL_STRIDE_0; |
src->width = BRW_WIDTH_4; |
src->horz_stride = BRW_HORIZONTAL_STRIDE_1; |
break; |
case TOY_RECT_010: |
src->vert_stride = BRW_VERTICAL_STRIDE_0; |
src->width = BRW_WIDTH_1; |
src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
break; |
case TOY_RECT_220: |
src->vert_stride = BRW_VERTICAL_STRIDE_2; |
src->width = BRW_WIDTH_2; |
src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
break; |
case TOY_RECT_440: |
src->vert_stride = BRW_VERTICAL_STRIDE_4; |
src->width = BRW_WIDTH_4; |
src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
break; |
case TOY_RECT_240: |
src->vert_stride = BRW_VERTICAL_STRIDE_2; |
src->width = BRW_WIDTH_4; |
src->horz_stride = BRW_HORIZONTAL_STRIDE_0; |
break; |
default: |
assert(!"unsupported src region"); |
src->vert_stride = BRW_VERTICAL_STRIDE_1; |
src->width = BRW_WIDTH_1; |
src->horz_stride = BRW_HORIZONTAL_STRIDE_1; |
break; |
} |
} |
} |
/** |
* Generate HW shader code. The instructions should have been legalized. |
*/ |
void * |
toy_compiler_assemble(struct toy_compiler *tc, int *size) |
{ |
const struct toy_inst *inst; |
uint32_t *code; |
int pc; |
code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t)); |
if (!code) |
return NULL; |
pc = 0; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
uint32_t *dw = &code[pc * 4]; |
struct codegen cg; |
if (pc >= tc->num_instructions) { |
tc_fail(tc, "wrong instructoun count"); |
break; |
} |
codegen_prepare(&cg, inst, pc, tc->rect_linear_width); |
codegen_validate_region_restrictions(&cg); |
switch (inst->opcode) { |
case BRW_OPCODE_MAD: |
codegen_inst_3src(&cg, dw); |
break; |
default: |
codegen_inst(&cg, dw); |
break; |
} |
pc++; |
} |
/* never return an invalid kernel */ |
if (tc->fail) { |
FREE(code); |
return NULL; |
} |
if (size) |
*size = pc * 4 * sizeof(uint32_t); |
return code; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c |
---|
0,0 → 1,1385 |
/* |
* Copyright © 2008 Keith Packard |
* |
* Permission to use, copy, modify, distribute, and sell this software and its |
* documentation for any purpose is hereby granted without fee, provided that |
* the above copyright notice appear in all copies and that both that copyright |
* notice and this permission notice appear in supporting documentation, and |
* that the name of the copyright holders not be used in advertising or |
* publicity pertaining to distribution of the software without specific, |
* written prior permission. The copyright holders make no representations |
* about the suitability of this software for any purpose. It is provided "as |
* is" without express or implied warranty. |
* |
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, |
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO |
* EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR |
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, |
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
* OF THIS SOFTWARE. |
*/ |
#include <stdio.h> |
#include <stdlib.h> |
#include <string.h> |
#include <getopt.h> |
#include <unistd.h> |
#include <stdarg.h> |
typedef short GLshort; |
typedef int GLint; |
typedef unsigned char GLubyte; |
typedef unsigned int GLuint; |
typedef float GLfloat; |
#include <stdint.h> |
#include "brw_defines.h" |
#include "brw_structs.h" |
static int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); |
#include "toy_compiler.h" |
void |
toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size) |
{ |
/* set this to true to dump the hex */ |
const bool dump_hex = false; |
const struct brw_instruction *instructions = kernel; |
int i; |
for (i = 0; i < size / sizeof(*instructions); i++) { |
if (dump_hex) { |
const uint32_t *dwords = (const uint32_t *) &instructions[i]; |
ilo_printf("0x%08x 0x%08x 0x%08x 0x%08x ", |
dwords[3], dwords[2], dwords[1], dwords[0]); |
} |
brw_disasm(stderr, (struct brw_instruction *) &instructions[i], |
ILO_GEN_GET_MAJOR(tc->dev->gen)); |
} |
} |
static const struct opcode_desc { |
char *name; |
int nsrc; |
int ndst; |
} opcode_descs[128] = { |
[BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 }, |
[BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 }, |
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, |
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 }, |
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, |
[BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, |
[BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, |
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, |
[BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, |
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, |
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, |
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, |
[BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, |
[BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, |
[BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, |
[BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, |
}; |
static const struct opcode_desc *opcode = opcode_descs; |
static const char * const conditional_modifier[16] = { |
[BRW_CONDITIONAL_NONE] = "", |
[BRW_CONDITIONAL_Z] = ".e", |
[BRW_CONDITIONAL_NZ] = ".ne", |
[BRW_CONDITIONAL_G] = ".g", |
[BRW_CONDITIONAL_GE] = ".ge", |
[BRW_CONDITIONAL_L] = ".l", |
[BRW_CONDITIONAL_LE] = ".le", |
[BRW_CONDITIONAL_R] = ".r", |
[BRW_CONDITIONAL_O] = ".o", |
[BRW_CONDITIONAL_U] = ".u", |
}; |
static const char * const negate[2] = { |
[0] = "", |
[1] = "-", |
}; |
static const char * const _abs[2] = { |
[0] = "", |
[1] = "(abs)", |
}; |
static const char * const vert_stride[16] = { |
[0] = "0", |
[1] = "1", |
[2] = "2", |
[3] = "4", |
[4] = "8", |
[5] = "16", |
[6] = "32", |
[15] = "VxH", |
}; |
static const char * const width[8] = { |
[0] = "1", |
[1] = "2", |
[2] = "4", |
[3] = "8", |
[4] = "16", |
}; |
static const char * const horiz_stride[4] = { |
[0] = "0", |
[1] = "1", |
[2] = "2", |
[3] = "4" |
}; |
static const char * const chan_sel[4] = { |
[0] = "x", |
[1] = "y", |
[2] = "z", |
[3] = "w", |
}; |
static const char * const debug_ctrl[2] = { |
[0] = "", |
[1] = ".breakpoint" |
}; |
static const char * const saturate[2] = { |
[0] = "", |
[1] = ".sat" |
}; |
static const char * const accwr[2] = { |
[0] = "", |
[1] = "AccWrEnable" |
}; |
static const char * const wectrl[2] = { |
[0] = "WE_normal", |
[1] = "WE_all" |
}; |
static const char * const exec_size[8] = { |
[0] = "1", |
[1] = "2", |
[2] = "4", |
[3] = "8", |
[4] = "16", |
[5] = "32" |
}; |
static const char * const pred_inv[2] = { |
[0] = "+", |
[1] = "-" |
}; |
static const char * const pred_ctrl_align16[16] = { |
[1] = "", |
[2] = ".x", |
[3] = ".y", |
[4] = ".z", |
[5] = ".w", |
[6] = ".any4h", |
[7] = ".all4h", |
}; |
static const char * const pred_ctrl_align1[16] = { |
[1] = "", |
[2] = ".anyv", |
[3] = ".allv", |
[4] = ".any2h", |
[5] = ".all2h", |
[6] = ".any4h", |
[7] = ".all4h", |
[8] = ".any8h", |
[9] = ".all8h", |
[10] = ".any16h", |
[11] = ".all16h", |
}; |
static const char * const thread_ctrl[4] = { |
[0] = "", |
[2] = "switch" |
}; |
static const char * const compr_ctrl[4] = { |
[0] = "", |
[1] = "sechalf", |
[2] = "compr", |
[3] = "compr4", |
}; |
static const char * const dep_ctrl[4] = { |
[0] = "", |
[1] = "NoDDClr", |
[2] = "NoDDChk", |
[3] = "NoDDClr,NoDDChk", |
}; |
static const char * const mask_ctrl[4] = { |
[0] = "", |
[1] = "nomask", |
}; |
static const char * const access_mode[2] = { |
[0] = "align1", |
[1] = "align16", |
}; |
static const char * const reg_encoding[8] = { |
[0] = "UD", |
[1] = "D", |
[2] = "UW", |
[3] = "W", |
[4] = "UB", |
[5] = "B", |
[7] = "F" |
}; |
const int reg_type_size[8] = { |
[0] = 4, |
[1] = 4, |
[2] = 2, |
[3] = 2, |
[4] = 1, |
[5] = 1, |
[7] = 4 |
}; |
static const char * const reg_file[4] = { |
[0] = "A", |
[1] = "g", |
[2] = "m", |
[3] = "imm", |
}; |
static const char * const writemask[16] = { |
[0x0] = ".", |
[0x1] = ".x", |
[0x2] = ".y", |
[0x3] = ".xy", |
[0x4] = ".z", |
[0x5] = ".xz", |
[0x6] = ".yz", |
[0x7] = ".xyz", |
[0x8] = ".w", |
[0x9] = ".xw", |
[0xa] = ".yw", |
[0xb] = ".xyw", |
[0xc] = ".zw", |
[0xd] = ".xzw", |
[0xe] = ".yzw", |
[0xf] = "", |
}; |
static const char * const end_of_thread[2] = { |
[0] = "", |
[1] = "EOT" |
}; |
static const char * const target_function[16] = { |
[BRW_SFID_NULL] = "null", |
[BRW_SFID_MATH] = "math", |
[BRW_SFID_SAMPLER] = "sampler", |
[BRW_SFID_MESSAGE_GATEWAY] = "gateway", |
[BRW_SFID_DATAPORT_READ] = "read", |
[BRW_SFID_DATAPORT_WRITE] = "write", |
[BRW_SFID_URB] = "urb", |
[BRW_SFID_THREAD_SPAWNER] = "thread_spawner" |
}; |
static const char * const target_function_gen6[16] = { |
[BRW_SFID_NULL] = "null", |
[BRW_SFID_MATH] = "math", |
[BRW_SFID_SAMPLER] = "sampler", |
[BRW_SFID_MESSAGE_GATEWAY] = "gateway", |
[BRW_SFID_URB] = "urb", |
[BRW_SFID_THREAD_SPAWNER] = "thread_spawner", |
[GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler", |
[GEN6_SFID_DATAPORT_RENDER_CACHE] = "render", |
[GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const", |
[GEN7_SFID_DATAPORT_DATA_CACHE] = "data" |
}; |
static const char * const dp_rc_msg_type_gen6[16] = { |
[BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read", |
[GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read", |
[GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read", |
[GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read", |
[GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read", |
[GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read", |
[GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write", |
[GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write", |
[GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write", |
[GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write", |
[GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write", |
[GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write", |
[GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write", |
[GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write", |
}; |
static const char * const math_function[16] = { |
[BRW_MATH_FUNCTION_INV] = "inv", |
[BRW_MATH_FUNCTION_LOG] = "log", |
[BRW_MATH_FUNCTION_EXP] = "exp", |
[BRW_MATH_FUNCTION_SQRT] = "sqrt", |
[BRW_MATH_FUNCTION_RSQ] = "rsq", |
[BRW_MATH_FUNCTION_SIN] = "sin", |
[BRW_MATH_FUNCTION_COS] = "cos", |
[BRW_MATH_FUNCTION_SINCOS] = "sincos", |
[BRW_MATH_FUNCTION_FDIV] = "fdiv", |
[BRW_MATH_FUNCTION_POW] = "pow", |
[BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", |
[BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv", |
[BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod", |
}; |
static const char * const math_saturate[2] = { |
[0] = "", |
[1] = "sat" |
}; |
static const char * const math_signed[2] = { |
[0] = "", |
[1] = "signed" |
}; |
static const char * const math_scalar[2] = { |
[0] = "", |
[1] = "scalar" |
}; |
static const char * const math_precision[2] = { |
[0] = "", |
[1] = "partial_precision" |
}; |
static const char * const urb_opcode[2] = { |
[0] = "urb_write", |
[1] = "ff_sync", |
}; |
static const char * const urb_swizzle[4] = { |
[BRW_URB_SWIZZLE_NONE] = "", |
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", |
[BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", |
}; |
static const char * const urb_allocate[2] = { |
[0] = "", |
[1] = "allocate" |
}; |
static const char * const urb_used[2] = { |
[0] = "", |
[1] = "used" |
}; |
static const char * const urb_complete[2] = { |
[0] = "", |
[1] = "complete" |
}; |
static const char * const sampler_target_format[4] = { |
[0] = "F", |
[2] = "UD", |
[3] = "D" |
}; |
static int column; |
static int string (FILE *file, const char *string) |
{ |
fputs (string, file); |
column += strlen (string); |
return 0; |
} |
static int format (FILE *f, const char *format, ...) |
{ |
char buf[1024]; |
va_list args; |
va_start (args, format); |
vsnprintf (buf, sizeof (buf) - 1, format, args); |
va_end (args); |
string (f, buf); |
return 0; |
} |
static int newline (FILE *f) |
{ |
putc ('\n', f); |
column = 0; |
return 0; |
} |
static int pad (FILE *f, int c) |
{ |
do |
string (f, " "); |
while (column < c); |
return 0; |
} |
static int control (FILE *file, const char *name, const char * const ctrl[], |
GLuint id, int *space) |
{ |
if (!ctrl[id]) { |
fprintf (file, "*** invalid %s value %d ", |
name, id); |
return 1; |
} |
if (ctrl[id][0]) |
{ |
if (space && *space) |
string (file, " "); |
string (file, ctrl[id]); |
if (space) |
*space = 1; |
} |
return 0; |
} |
static int print_opcode (FILE *file, int id) |
{ |
if (!opcode[id].name) { |
format (file, "*** invalid opcode value %d ", id); |
return 1; |
} |
string (file, opcode[id].name); |
return 0; |
} |
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) |
{ |
int err = 0; |
/* Clear the Compr4 instruction compression bit. */ |
if (_reg_file == BRW_MESSAGE_REGISTER_FILE) |
_reg_nr &= ~(1 << 7); |
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { |
switch (_reg_nr & 0xf0) { |
case BRW_ARF_NULL: |
string (file, "null"); |
return -1; |
case BRW_ARF_ADDRESS: |
format (file, "a%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_ACCUMULATOR: |
format (file, "acc%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_FLAG: |
format (file, "f%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_MASK: |
format (file, "mask%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_MASK_STACK: |
format (file, "msd%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_STATE: |
format (file, "sr%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_CONTROL: |
format (file, "cr%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_NOTIFICATION_COUNT: |
format (file, "n%d", _reg_nr & 0x0f); |
break; |
case BRW_ARF_IP: |
string (file, "ip"); |
return -1; |
break; |
default: |
format (file, "ARF%d", _reg_nr); |
break; |
} |
} else { |
err |= control (file, "src reg file", reg_file, _reg_file, NULL); |
format (file, "%d", _reg_nr); |
} |
return err; |
} |
static int dest (FILE *file, struct brw_instruction *inst) |
{ |
int err = 0; |
if (inst->header.access_mode == BRW_ALIGN_1) |
{ |
if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) |
{ |
err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); |
if (err == -1) |
return 0; |
if (inst->bits1.da1.dest_subreg_nr) |
format (file, ".%d", inst->bits1.da1.dest_subreg_nr / |
reg_type_size[inst->bits1.da1.dest_reg_type]); |
string (file, "<"); |
err |= control (file, "horiz stride", horiz_stride, inst->bits1.da1.dest_horiz_stride, NULL); |
string (file, ">"); |
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); |
} |
else |
{ |
string (file, "g[a0"); |
if (inst->bits1.ia1.dest_subreg_nr) |
format (file, ".%d", inst->bits1.ia1.dest_subreg_nr / |
reg_type_size[inst->bits1.ia1.dest_reg_type]); |
if (inst->bits1.ia1.dest_indirect_offset) |
format (file, " %d", inst->bits1.ia1.dest_indirect_offset); |
string (file, "]<"); |
err |= control (file, "horiz stride", horiz_stride, inst->bits1.ia1.dest_horiz_stride, NULL); |
string (file, ">"); |
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); |
} |
} |
else |
{ |
if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) |
{ |
err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); |
if (err == -1) |
return 0; |
if (inst->bits1.da16.dest_subreg_nr) |
format (file, ".%d", inst->bits1.da16.dest_subreg_nr / |
reg_type_size[inst->bits1.da16.dest_reg_type]); |
string (file, "<1>"); |
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); |
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); |
} |
else |
{ |
err = 1; |
string (file, "Indirect align16 address mode not supported"); |
} |
} |
return 0; |
} |
static int dest_3src (FILE *file, struct brw_instruction *inst) |
{ |
int err = 0; |
uint32_t reg_file; |
if (inst->bits1.da3src.dest_reg_file) |
reg_file = BRW_MESSAGE_REGISTER_FILE; |
else |
reg_file = BRW_GENERAL_REGISTER_FILE; |
err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr); |
if (err == -1) |
return 0; |
if (inst->bits1.da3src.dest_subreg_nr) |
format (file, ".%d", inst->bits1.da3src.dest_subreg_nr); |
string (file, "<1>"); |
err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL); |
err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL); |
return 0; |
} |
static int src_align1_region (FILE *file, |
GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) |
{ |
int err = 0; |
string (file, "<"); |
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); |
string (file, ","); |
err |= control (file, "width", width, _width, NULL); |
string (file, ","); |
err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); |
string (file, ">"); |
return err; |
} |
static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, |
GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, |
GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) |
{ |
int err = 0; |
err |= control (file, "negate", negate, _negate, NULL); |
err |= control (file, "abs", _abs, __abs, NULL); |
err |= reg (file, _reg_file, reg_num); |
if (err == -1) |
return 0; |
if (sub_reg_num) |
format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */ |
src_align1_region (file, _vert_stride, _width, _horiz_stride); |
err |= control (file, "src reg encoding", reg_encoding, type, NULL); |
return err; |
} |
static int src_ia1 (FILE *file, |
GLuint type, |
GLuint _reg_file, |
GLint _addr_imm, |
GLuint _addr_subreg_nr, |
GLuint _negate, |
GLuint __abs, |
GLuint _addr_mode, |
GLuint _horiz_stride, |
GLuint _width, |
GLuint _vert_stride) |
{ |
int err = 0; |
err |= control (file, "negate", negate, _negate, NULL); |
err |= control (file, "abs", _abs, __abs, NULL); |
string (file, "g[a0"); |
if (_addr_subreg_nr) |
format (file, ".%d", _addr_subreg_nr); |
if (_addr_imm) |
format (file, " %d", _addr_imm); |
string (file, "]"); |
src_align1_region (file, _vert_stride, _width, _horiz_stride); |
err |= control (file, "src reg encoding", reg_encoding, type, NULL); |
return err; |
} |
static int src_da16 (FILE *file, |
GLuint _reg_type, |
GLuint _reg_file, |
GLuint _vert_stride, |
GLuint _reg_nr, |
GLuint _subreg_nr, |
GLuint __abs, |
GLuint _negate, |
GLuint swz_x, |
GLuint swz_y, |
GLuint swz_z, |
GLuint swz_w) |
{ |
int err = 0; |
err |= control (file, "negate", negate, _negate, NULL); |
err |= control (file, "abs", _abs, __abs, NULL); |
err |= reg (file, _reg_file, _reg_nr); |
if (err == -1) |
return 0; |
if (_subreg_nr) |
/* bit4 for subreg number byte addressing. Make this same meaning as |
in da1 case, so output looks consistent. */ |
format (file, ".%d", 16 / reg_type_size[_reg_type]); |
string (file, "<"); |
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); |
string (file, ",4,1>"); |
/* |
* Three kinds of swizzle display: |
* identity - nothing printed |
* 1->all - print the single channel |
* 1->1 - print the mapping |
*/ |
if (swz_x == BRW_CHANNEL_X && |
swz_y == BRW_CHANNEL_Y && |
swz_z == BRW_CHANNEL_Z && |
swz_w == BRW_CHANNEL_W) |
{ |
; |
} |
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
} |
else |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
err |= control (file, "channel select", chan_sel, swz_y, NULL); |
err |= control (file, "channel select", chan_sel, swz_z, NULL); |
err |= control (file, "channel select", chan_sel, swz_w, NULL); |
} |
err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); |
return err; |
} |
static int src0_3src (FILE *file, struct brw_instruction *inst) |
{ |
int err = 0; |
GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3; |
GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3; |
GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3; |
GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3; |
err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL); |
err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL); |
err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr); |
if (err == -1) |
return 0; |
if (inst->bits2.da3src.src0_subreg_nr) |
format (file, ".%d", inst->bits2.da3src.src0_subreg_nr); |
string (file, "<4,1,1>"); |
err |= control (file, "src da16 reg type", reg_encoding, |
BRW_REGISTER_TYPE_F, NULL); |
/* |
* Three kinds of swizzle display: |
* identity - nothing printed |
* 1->all - print the single channel |
* 1->1 - print the mapping |
*/ |
if (swz_x == BRW_CHANNEL_X && |
swz_y == BRW_CHANNEL_Y && |
swz_z == BRW_CHANNEL_Z && |
swz_w == BRW_CHANNEL_W) |
{ |
; |
} |
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
} |
else |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
err |= control (file, "channel select", chan_sel, swz_y, NULL); |
err |= control (file, "channel select", chan_sel, swz_z, NULL); |
err |= control (file, "channel select", chan_sel, swz_w, NULL); |
} |
return err; |
} |
static int src1_3src (FILE *file, struct brw_instruction *inst) |
{ |
int err = 0; |
GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3; |
GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3; |
GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3; |
GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3; |
GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low | |
(inst->bits3.da3src.src1_subreg_nr_high << 2)); |
err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate, |
NULL); |
err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL); |
err |= reg (file, BRW_GENERAL_REGISTER_FILE, |
inst->bits3.da3src.src1_reg_nr); |
if (err == -1) |
return 0; |
if (src1_subreg_nr) |
format (file, ".%d", src1_subreg_nr); |
string (file, "<4,1,1>"); |
err |= control (file, "src da16 reg type", reg_encoding, |
BRW_REGISTER_TYPE_F, NULL); |
/* |
* Three kinds of swizzle display: |
* identity - nothing printed |
* 1->all - print the single channel |
* 1->1 - print the mapping |
*/ |
if (swz_x == BRW_CHANNEL_X && |
swz_y == BRW_CHANNEL_Y && |
swz_z == BRW_CHANNEL_Z && |
swz_w == BRW_CHANNEL_W) |
{ |
; |
} |
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
} |
else |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
err |= control (file, "channel select", chan_sel, swz_y, NULL); |
err |= control (file, "channel select", chan_sel, swz_z, NULL); |
err |= control (file, "channel select", chan_sel, swz_w, NULL); |
} |
return err; |
} |
static int src2_3src (FILE *file, struct brw_instruction *inst) |
{ |
int err = 0; |
GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3; |
GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3; |
GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3; |
GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3; |
err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate, |
NULL); |
err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL); |
err |= reg (file, BRW_GENERAL_REGISTER_FILE, |
inst->bits3.da3src.src2_reg_nr); |
if (err == -1) |
return 0; |
if (inst->bits3.da3src.src2_subreg_nr) |
format (file, ".%d", inst->bits3.da3src.src2_subreg_nr); |
string (file, "<4,1,1>"); |
err |= control (file, "src da16 reg type", reg_encoding, |
BRW_REGISTER_TYPE_F, NULL); |
/* |
* Three kinds of swizzle display: |
* identity - nothing printed |
* 1->all - print the single channel |
* 1->1 - print the mapping |
*/ |
if (swz_x == BRW_CHANNEL_X && |
swz_y == BRW_CHANNEL_Y && |
swz_z == BRW_CHANNEL_Z && |
swz_w == BRW_CHANNEL_W) |
{ |
; |
} |
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
} |
else |
{ |
string (file, "."); |
err |= control (file, "channel select", chan_sel, swz_x, NULL); |
err |= control (file, "channel select", chan_sel, swz_y, NULL); |
err |= control (file, "channel select", chan_sel, swz_z, NULL); |
err |= control (file, "channel select", chan_sel, swz_w, NULL); |
} |
return err; |
} |
static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { |
switch (type) { |
case BRW_REGISTER_TYPE_UD: |
format (file, "0x%08xUD", inst->bits3.ud); |
break; |
case BRW_REGISTER_TYPE_D: |
format (file, "%dD", inst->bits3.d); |
break; |
case BRW_REGISTER_TYPE_UW: |
format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); |
break; |
case BRW_REGISTER_TYPE_W: |
format (file, "%dW", (int16_t) inst->bits3.d); |
break; |
case BRW_REGISTER_TYPE_UB: |
format (file, "0x%02xUB", (int8_t) inst->bits3.ud); |
break; |
case BRW_REGISTER_TYPE_VF: |
format (file, "Vector Float"); |
break; |
case BRW_REGISTER_TYPE_V: |
format (file, "0x%08xV", inst->bits3.ud); |
break; |
case BRW_REGISTER_TYPE_F: |
format (file, "%-gF", inst->bits3.f); |
} |
return 0; |
} |
static int src0 (FILE *file, struct brw_instruction *inst) |
{ |
if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) |
return imm (file, inst->bits1.da1.src0_reg_type, |
inst); |
else if (inst->header.access_mode == BRW_ALIGN_1) |
{ |
if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) |
{ |
return src_da1 (file, |
inst->bits1.da1.src0_reg_type, |
inst->bits1.da1.src0_reg_file, |
inst->bits2.da1.src0_vert_stride, |
inst->bits2.da1.src0_width, |
inst->bits2.da1.src0_horiz_stride, |
inst->bits2.da1.src0_reg_nr, |
inst->bits2.da1.src0_subreg_nr, |
inst->bits2.da1.src0_abs, |
inst->bits2.da1.src0_negate); |
} |
else |
{ |
return src_ia1 (file, |
inst->bits1.ia1.src0_reg_type, |
inst->bits1.ia1.src0_reg_file, |
inst->bits2.ia1.src0_indirect_offset, |
inst->bits2.ia1.src0_subreg_nr, |
inst->bits2.ia1.src0_negate, |
inst->bits2.ia1.src0_abs, |
inst->bits2.ia1.src0_address_mode, |
inst->bits2.ia1.src0_horiz_stride, |
inst->bits2.ia1.src0_width, |
inst->bits2.ia1.src0_vert_stride); |
} |
} |
else |
{ |
if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) |
{ |
return src_da16 (file, |
inst->bits1.da16.src0_reg_type, |
inst->bits1.da16.src0_reg_file, |
inst->bits2.da16.src0_vert_stride, |
inst->bits2.da16.src0_reg_nr, |
inst->bits2.da16.src0_subreg_nr, |
inst->bits2.da16.src0_abs, |
inst->bits2.da16.src0_negate, |
inst->bits2.da16.src0_swz_x, |
inst->bits2.da16.src0_swz_y, |
inst->bits2.da16.src0_swz_z, |
inst->bits2.da16.src0_swz_w); |
} |
else |
{ |
string (file, "Indirect align16 address mode not supported"); |
return 1; |
} |
} |
} |
static int src1 (FILE *file, struct brw_instruction *inst) |
{ |
if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) |
return imm (file, inst->bits1.da1.src1_reg_type, |
inst); |
else if (inst->header.access_mode == BRW_ALIGN_1) |
{ |
if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) |
{ |
return src_da1 (file, |
inst->bits1.da1.src1_reg_type, |
inst->bits1.da1.src1_reg_file, |
inst->bits3.da1.src1_vert_stride, |
inst->bits3.da1.src1_width, |
inst->bits3.da1.src1_horiz_stride, |
inst->bits3.da1.src1_reg_nr, |
inst->bits3.da1.src1_subreg_nr, |
inst->bits3.da1.src1_abs, |
inst->bits3.da1.src1_negate); |
} |
else |
{ |
return src_ia1 (file, |
inst->bits1.ia1.src1_reg_type, |
inst->bits1.ia1.src1_reg_file, |
inst->bits3.ia1.src1_indirect_offset, |
inst->bits3.ia1.src1_subreg_nr, |
inst->bits3.ia1.src1_negate, |
inst->bits3.ia1.src1_abs, |
inst->bits3.ia1.src1_address_mode, |
inst->bits3.ia1.src1_horiz_stride, |
inst->bits3.ia1.src1_width, |
inst->bits3.ia1.src1_vert_stride); |
} |
} |
else |
{ |
if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) |
{ |
return src_da16 (file, |
inst->bits1.da16.src1_reg_type, |
inst->bits1.da16.src1_reg_file, |
inst->bits3.da16.src1_vert_stride, |
inst->bits3.da16.src1_reg_nr, |
inst->bits3.da16.src1_subreg_nr, |
inst->bits3.da16.src1_abs, |
inst->bits3.da16.src1_negate, |
inst->bits3.da16.src1_swz_x, |
inst->bits3.da16.src1_swz_y, |
inst->bits3.da16.src1_swz_z, |
inst->bits3.da16.src1_swz_w); |
} |
else |
{ |
string (file, "Indirect align16 address mode not supported"); |
return 1; |
} |
} |
} |
int esize[6] = { |
[0] = 1, |
[1] = 2, |
[2] = 4, |
[3] = 8, |
[4] = 16, |
[5] = 32, |
}; |
static int qtr_ctrl(FILE *file, struct brw_instruction *inst) |
{ |
int qtr_ctl = inst->header.compression_control; |
int exec_size = esize[inst->header.execution_size]; |
if (exec_size == 8) { |
switch (qtr_ctl) { |
case 0: |
string (file, " 1Q"); |
break; |
case 1: |
string (file, " 2Q"); |
break; |
case 2: |
string (file, " 3Q"); |
break; |
case 3: |
string (file, " 4Q"); |
break; |
} |
} else if (exec_size == 16){ |
if (qtr_ctl < 2) |
string (file, " 1H"); |
else |
string (file, " 2H"); |
} |
return 0; |
} |
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) |
{ |
int err = 0; |
int space = 0; |
if (inst->header.predicate_control) { |
string (file, "("); |
err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); |
format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); |
if (inst->bits2.da1.flag_subreg_nr) |
format (file, ".%d", inst->bits2.da1.flag_subreg_nr); |
if (inst->header.access_mode == BRW_ALIGN_1) |
err |= control (file, "predicate control align1", pred_ctrl_align1, |
inst->header.predicate_control, NULL); |
else |
err |= control (file, "predicate control align16", pred_ctrl_align16, |
inst->header.predicate_control, NULL); |
string (file, ") "); |
} |
err |= print_opcode (file, inst->header.opcode); |
err |= control (file, "saturate", saturate, inst->header.saturate, NULL); |
err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); |
if (inst->header.opcode == BRW_OPCODE_MATH) { |
string (file, " "); |
err |= control (file, "function", math_function, |
inst->header.destreg__conditionalmod, NULL); |
} else if (inst->header.opcode != BRW_OPCODE_SEND && |
inst->header.opcode != BRW_OPCODE_SENDC) { |
err |= control (file, "conditional modifier", conditional_modifier, |
inst->header.destreg__conditionalmod, NULL); |
/* If we're using the conditional modifier, print which flags reg is |
* used for it. Note that on gen6+, the embedded-condition SEL and |
* control flow doesn't update flags. |
*/ |
if (inst->header.destreg__conditionalmod && |
(gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL && |
inst->header.opcode != BRW_OPCODE_IF && |
inst->header.opcode != BRW_OPCODE_WHILE))) { |
format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0); |
if (inst->bits2.da1.flag_subreg_nr) |
format (file, ".%d", inst->bits2.da1.flag_subreg_nr); |
} |
} |
if (inst->header.opcode != BRW_OPCODE_NOP) { |
string (file, "("); |
err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); |
string (file, ")"); |
} |
if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6) |
format (file, " %d", inst->header.destreg__conditionalmod); |
if (opcode[inst->header.opcode].nsrc == 3) { |
pad (file, 16); |
err |= dest_3src (file, inst); |
pad (file, 32); |
err |= src0_3src (file, inst); |
pad (file, 48); |
err |= src1_3src (file, inst); |
pad (file, 64); |
err |= src2_3src (file, inst); |
} else { |
if (opcode[inst->header.opcode].ndst > 0) { |
pad (file, 16); |
err |= dest (file, inst); |
} else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE || |
inst->header.opcode == BRW_OPCODE_ENDIF || |
inst->header.opcode == BRW_OPCODE_WHILE)) { |
format (file, " %d", inst->bits3.break_cont.jip); |
} else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF || |
inst->header.opcode == BRW_OPCODE_ELSE || |
inst->header.opcode == BRW_OPCODE_ENDIF || |
inst->header.opcode == BRW_OPCODE_WHILE)) { |
format (file, " %d", inst->bits1.branch_gen6.jump_count); |
} else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK || |
inst->header.opcode == BRW_OPCODE_CONTINUE || |
inst->header.opcode == BRW_OPCODE_HALT)) || |
(gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) { |
format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip); |
} else if (inst->header.opcode == BRW_OPCODE_JMPI) { |
format (file, " %d", inst->bits3.d); |
} |
if (opcode[inst->header.opcode].nsrc > 0) { |
pad (file, 32); |
err |= src0 (file, inst); |
} |
if (opcode[inst->header.opcode].nsrc > 1) { |
pad (file, 48); |
err |= src1 (file, inst); |
} |
} |
if (inst->header.opcode == BRW_OPCODE_SEND || |
inst->header.opcode == BRW_OPCODE_SENDC) { |
enum brw_message_target target; |
if (gen >= 6) |
target = inst->header.destreg__conditionalmod; |
else if (gen == 5) |
target = inst->bits2.send_gen5.sfid; |
else |
target = inst->bits3.generic.msg_target; |
newline (file); |
pad (file, 16); |
space = 0; |
if (gen >= 6) { |
err |= control (file, "target function", target_function_gen6, |
target, &space); |
} else { |
err |= control (file, "target function", target_function, |
target, &space); |
} |
switch (target) { |
case BRW_SFID_MATH: |
err |= control (file, "math function", math_function, |
inst->bits3.math.function, &space); |
err |= control (file, "math saturate", math_saturate, |
inst->bits3.math.saturate, &space); |
err |= control (file, "math signed", math_signed, |
inst->bits3.math.int_type, &space); |
err |= control (file, "math scalar", math_scalar, |
inst->bits3.math.data_type, &space); |
err |= control (file, "math precision", math_precision, |
inst->bits3.math.precision, &space); |
break; |
case BRW_SFID_SAMPLER: |
if (gen >= 7) { |
format (file, " (%d, %d, %d, %d)", |
inst->bits3.sampler_gen7.binding_table_index, |
inst->bits3.sampler_gen7.sampler, |
inst->bits3.sampler_gen7.msg_type, |
inst->bits3.sampler_gen7.simd_mode); |
} else if (gen >= 5) { |
format (file, " (%d, %d, %d, %d)", |
inst->bits3.sampler_gen5.binding_table_index, |
inst->bits3.sampler_gen5.sampler, |
inst->bits3.sampler_gen5.msg_type, |
inst->bits3.sampler_gen5.simd_mode); |
} else if (0 /* FINISHME: is_g4x */) { |
format (file, " (%d, %d)", |
inst->bits3.sampler_g4x.binding_table_index, |
inst->bits3.sampler_g4x.sampler); |
} else { |
format (file, " (%d, %d, ", |
inst->bits3.sampler.binding_table_index, |
inst->bits3.sampler.sampler); |
err |= control (file, "sampler target format", |
sampler_target_format, |
inst->bits3.sampler.return_format, NULL); |
string (file, ")"); |
} |
break; |
case BRW_SFID_DATAPORT_READ: |
if (gen >= 6) { |
format (file, " (%d, %d, %d, %d)", |
inst->bits3.gen6_dp.binding_table_index, |
inst->bits3.gen6_dp.msg_control, |
inst->bits3.gen6_dp.msg_type, |
inst->bits3.gen6_dp.send_commit_msg); |
} else if (gen >= 5 /* FINISHME: || is_g4x */) { |
format (file, " (%d, %d, %d)", |
inst->bits3.dp_read_gen5.binding_table_index, |
inst->bits3.dp_read_gen5.msg_control, |
inst->bits3.dp_read_gen5.msg_type); |
} else { |
format (file, " (%d, %d, %d)", |
inst->bits3.dp_read.binding_table_index, |
inst->bits3.dp_read.msg_control, |
inst->bits3.dp_read.msg_type); |
} |
break; |
case BRW_SFID_DATAPORT_WRITE: |
if (gen >= 7) { |
format (file, " ("); |
err |= control (file, "DP rc message type", |
dp_rc_msg_type_gen6, |
inst->bits3.gen7_dp.msg_type, &space); |
format (file, ", %d, %d, %d)", |
inst->bits3.gen7_dp.binding_table_index, |
inst->bits3.gen7_dp.msg_control, |
inst->bits3.gen7_dp.msg_type); |
} else if (gen == 6) { |
format (file, " ("); |
err |= control (file, "DP rc message type", |
dp_rc_msg_type_gen6, |
inst->bits3.gen6_dp.msg_type, &space); |
format (file, ", %d, %d, %d, %d)", |
inst->bits3.gen6_dp.binding_table_index, |
inst->bits3.gen6_dp.msg_control, |
inst->bits3.gen6_dp.msg_type, |
inst->bits3.gen6_dp.send_commit_msg); |
} else { |
format (file, " (%d, %d, %d, %d)", |
inst->bits3.dp_write.binding_table_index, |
(inst->bits3.dp_write.last_render_target << 3) | |
inst->bits3.dp_write.msg_control, |
inst->bits3.dp_write.msg_type, |
inst->bits3.dp_write.send_commit_msg); |
} |
break; |
case BRW_SFID_URB: |
if (gen >= 5) { |
format (file, " %d", inst->bits3.urb_gen5.offset); |
} else { |
format (file, " %d", inst->bits3.urb.offset); |
} |
space = 1; |
if (gen >= 5) { |
err |= control (file, "urb opcode", urb_opcode, |
inst->bits3.urb_gen5.opcode, &space); |
} |
err |= control (file, "urb swizzle", urb_swizzle, |
inst->bits3.urb.swizzle_control, &space); |
err |= control (file, "urb allocate", urb_allocate, |
inst->bits3.urb.allocate, &space); |
err |= control (file, "urb used", urb_used, |
inst->bits3.urb.used, &space); |
err |= control (file, "urb complete", urb_complete, |
inst->bits3.urb.complete, &space); |
break; |
case BRW_SFID_THREAD_SPAWNER: |
break; |
case GEN7_SFID_DATAPORT_DATA_CACHE: |
format (file, " (%d, %d, %d)", |
inst->bits3.gen7_dp.binding_table_index, |
inst->bits3.gen7_dp.msg_control, |
inst->bits3.gen7_dp.msg_type); |
break; |
default: |
format (file, "unsupported target %d", target); |
break; |
} |
if (space) |
string (file, " "); |
if (gen >= 5) { |
format (file, "mlen %d", |
inst->bits3.generic_gen5.msg_length); |
format (file, " rlen %d", |
inst->bits3.generic_gen5.response_length); |
} else { |
format (file, "mlen %d", |
inst->bits3.generic.msg_length); |
format (file, " rlen %d", |
inst->bits3.generic.response_length); |
} |
} |
pad (file, 64); |
if (inst->header.opcode != BRW_OPCODE_NOP) { |
string (file, "{"); |
space = 1; |
err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); |
if (gen >= 6) |
err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space); |
else |
err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); |
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); |
if (gen >= 6) |
err |= qtr_ctrl (file, inst); |
else { |
if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED && |
opcode[inst->header.opcode].ndst > 0 && |
inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE && |
inst->bits1.da1.dest_reg_nr & (1 << 7)) { |
format (file, " compr4"); |
} else { |
err |= control (file, "compression control", compr_ctrl, |
inst->header.compression_control, &space); |
} |
} |
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); |
if (gen >= 6) |
err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space); |
if (inst->header.opcode == BRW_OPCODE_SEND || |
inst->header.opcode == BRW_OPCODE_SENDC) |
err |= control (file, "end of thread", end_of_thread, |
inst->bits3.generic.end_of_thread, &space); |
if (space) |
string (file, " "); |
string (file, "}"); |
} |
string (file, ";"); |
newline (file); |
return err; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler_reg.h |
---|
0,0 → 1,800 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef TOY_REG_H |
#define TOY_REG_H |
#include "pipe/p_compiler.h" |
#include "util/u_debug.h" /* for assert() */ |
#include "util/u_math.h" /* for union fi */ |
/* a toy reg is 256-bit wide */ |
#define TOY_REG_WIDTH 32 |
/** |
* Register files. |
*/ |
enum toy_file { |
/* virtual register file */ |
TOY_FILE_VRF, |
TOY_FILE_ARF, |
TOY_FILE_GRF, |
TOY_FILE_MRF, |
TOY_FILE_IMM, |
TOY_FILE_COUNT, |
}; |
/** |
* Register types. |
*/ |
enum toy_type { |
TOY_TYPE_F, |
TOY_TYPE_D, |
TOY_TYPE_UD, |
TOY_TYPE_W, |
TOY_TYPE_UW, |
TOY_TYPE_V, /* only valid for immediates */ |
TOY_TYPE_COUNT, |
}; |
/** |
* Register rectangles. The three numbers stand for vertical stride, width, |
* and horizontal stride respectively. |
*/ |
enum toy_rect { |
TOY_RECT_LINEAR, |
TOY_RECT_041, |
TOY_RECT_010, |
TOY_RECT_220, |
TOY_RECT_440, |
TOY_RECT_240, |
TOY_RECT_COUNT, |
}; |
/** |
* Source swizzles. They are compatible with TGSI_SWIZZLE_x and hardware |
* values. |
*/ |
enum toy_swizzle { |
TOY_SWIZZLE_X = 0, |
TOY_SWIZZLE_Y = 1, |
TOY_SWIZZLE_Z = 2, |
TOY_SWIZZLE_W = 3, |
}; |
/** |
* Destination writemasks. They are compatible with TGSI_WRITEMASK_x and |
* hardware values. |
*/ |
enum toy_writemask { |
TOY_WRITEMASK_X = (1 << TOY_SWIZZLE_X), |
TOY_WRITEMASK_Y = (1 << TOY_SWIZZLE_Y), |
TOY_WRITEMASK_Z = (1 << TOY_SWIZZLE_Z), |
TOY_WRITEMASK_W = (1 << TOY_SWIZZLE_W), |
TOY_WRITEMASK_XY = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y), |
TOY_WRITEMASK_XZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z), |
TOY_WRITEMASK_XW = (TOY_WRITEMASK_X | TOY_WRITEMASK_W), |
TOY_WRITEMASK_YZ = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z), |
TOY_WRITEMASK_YW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_W), |
TOY_WRITEMASK_ZW = (TOY_WRITEMASK_Z | TOY_WRITEMASK_W), |
TOY_WRITEMASK_XYZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_Z), |
TOY_WRITEMASK_XYW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_W), |
TOY_WRITEMASK_XZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z | TOY_WRITEMASK_W), |
TOY_WRITEMASK_YZW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z | TOY_WRITEMASK_W), |
TOY_WRITEMASK_XYZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | |
TOY_WRITEMASK_Z | TOY_WRITEMASK_W), |
}; |
/** |
* Destination operand. |
*/ |
struct toy_dst { |
unsigned file:3; /* TOY_FILE_x */ |
unsigned type:3; /* TOY_TYPE_x */ |
unsigned rect:3; /* TOY_RECT_x */ |
unsigned indirect:1; /* true or false */ |
unsigned indirect_subreg:6; /* which subreg of a0? */ |
unsigned writemask:4; /* TOY_WRITEMASK_x */ |
unsigned pad:12; |
uint32_t val32; |
}; |
/** |
* Source operand. |
*/ |
struct toy_src { |
unsigned file:3; /* TOY_FILE_x */ |
unsigned type:3; /* TOY_TYPE_x */ |
unsigned rect:3; /* TOY_RECT_x */ |
unsigned indirect:1; /* true or false */ |
unsigned indirect_subreg:6; /* which subreg of a0? */ |
unsigned swizzle_x:2; /* TOY_SWIZZLE_x */ |
unsigned swizzle_y:2; /* TOY_SWIZZLE_x */ |
unsigned swizzle_z:2; /* TOY_SWIZZLE_x */ |
unsigned swizzle_w:2; /* TOY_SWIZZLE_x */ |
unsigned absolute:1; /* true or false */ |
unsigned negate:1; /* true or false */ |
unsigned pad:6; |
uint32_t val32; |
}; |
/** |
* Return true if the file is virtual. |
*/ |
static inline bool |
toy_file_is_virtual(enum toy_file file) |
{ |
return (file == TOY_FILE_VRF); |
} |
/** |
* Return true if the file is a hardware one. |
*/ |
static inline bool |
toy_file_is_hw(enum toy_file file) |
{ |
return !toy_file_is_virtual(file); |
} |
/** |
* Return the size of the file. |
*/ |
static inline uint32_t |
toy_file_size(enum toy_file file) |
{ |
switch (file) { |
case TOY_FILE_GRF: |
return 256 * TOY_REG_WIDTH; |
case TOY_FILE_MRF: |
/* there is no MRF on GEN7+ */ |
return 256 * TOY_REG_WIDTH; |
default: |
assert(!"invalid toy file"); |
return 0; |
} |
} |
/** |
* Return the size of the type. |
*/ |
static inline int |
toy_type_size(enum toy_type type) |
{ |
switch (type) { |
case TOY_TYPE_F: |
case TOY_TYPE_D: |
case TOY_TYPE_UD: |
return 4; |
case TOY_TYPE_W: |
case TOY_TYPE_UW: |
return 2; |
case TOY_TYPE_V: |
default: |
assert(!"invalid toy type"); |
return 0; |
} |
} |
/** |
* Return true if the destination operand is null. |
*/ |
static inline bool |
tdst_is_null(struct toy_dst dst) |
{ |
/* BRW_ARF_NULL happens to be 0 */ |
return (dst.file == TOY_FILE_ARF && dst.val32 == 0); |
} |
/** |
* Validate the destination operand. |
*/ |
static inline struct toy_dst |
tdst_validate(struct toy_dst dst) |
{ |
switch (dst.file) { |
case TOY_FILE_VRF: |
case TOY_FILE_ARF: |
case TOY_FILE_MRF: |
assert(!dst.indirect); |
if (dst.file == TOY_FILE_MRF) |
assert(dst.val32 < toy_file_size(dst.file)); |
break; |
case TOY_FILE_GRF: |
if (!dst.indirect) |
assert(dst.val32 < toy_file_size(dst.file)); |
break; |
case TOY_FILE_IMM: |
/* yes, dst can be IMM of type W (for IF/ELSE/ENDIF/WHILE) */ |
assert(!dst.indirect); |
assert(dst.type == TOY_TYPE_W); |
break; |
default: |
assert(!"invalid dst file"); |
break; |
} |
switch (dst.type) { |
case TOY_TYPE_V: |
assert(!"invalid dst type"); |
break; |
default: |
break; |
} |
assert(dst.rect == TOY_RECT_LINEAR); |
if (dst.file != TOY_FILE_IMM) |
assert(dst.val32 % toy_type_size(dst.type) == 0); |
assert(dst.writemask <= TOY_WRITEMASK_XYZW); |
return dst; |
} |
/** |
* Change the type of the destination operand. |
*/ |
static inline struct toy_dst |
tdst_type(struct toy_dst dst, enum toy_type type) |
{ |
dst.type = type; |
return tdst_validate(dst); |
} |
/** |
* Change the type of the destination operand to TOY_TYPE_D. |
*/ |
static inline struct toy_dst |
tdst_d(struct toy_dst dst) |
{ |
return tdst_type(dst, TOY_TYPE_D); |
} |
/** |
* Change the type of the destination operand to TOY_TYPE_UD. |
*/ |
static inline struct toy_dst |
tdst_ud(struct toy_dst dst) |
{ |
return tdst_type(dst, TOY_TYPE_UD); |
} |
/** |
* Change the type of the destination operand to TOY_TYPE_W. |
*/ |
static inline struct toy_dst |
tdst_w(struct toy_dst dst) |
{ |
return tdst_type(dst, TOY_TYPE_W); |
} |
/** |
* Change the type of the destination operand to TOY_TYPE_UW. |
*/ |
static inline struct toy_dst |
tdst_uw(struct toy_dst dst) |
{ |
return tdst_type(dst, TOY_TYPE_UW); |
} |
/** |
* Change the rectangle of the destination operand. |
*/ |
static inline struct toy_dst |
tdst_rect(struct toy_dst dst, enum toy_rect rect) |
{ |
dst.rect = rect; |
return tdst_validate(dst); |
} |
/** |
* Apply writemask to the destination operand. Note that the current |
* writemask is honored. |
*/ |
static inline struct toy_dst |
tdst_writemask(struct toy_dst dst, enum toy_writemask writemask) |
{ |
dst.writemask &= writemask; |
return tdst_validate(dst); |
} |
/** |
* Offset the destination operand. |
*/ |
static inline struct toy_dst |
tdst_offset(struct toy_dst dst, int reg, int subreg) |
{ |
dst.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(dst.type); |
return tdst_validate(dst); |
} |
/** |
* Construct a destination operand. |
*/ |
static inline struct toy_dst |
tdst_full(enum toy_file file, enum toy_type type, enum toy_rect rect, |
bool indirect, unsigned indirect_subreg, |
enum toy_writemask writemask, uint32_t val32) |
{ |
struct toy_dst dst; |
dst.file = file; |
dst.type = type; |
dst.rect = rect; |
dst.indirect = indirect; |
dst.indirect_subreg = indirect_subreg; |
dst.writemask = writemask; |
dst.pad = 0; |
dst.val32 = val32; |
return tdst_validate(dst); |
} |
/** |
* Construct a null destination operand. |
*/ |
static inline struct toy_dst |
tdst_null(void) |
{ |
static const struct toy_dst null_dst = { |
.file = TOY_FILE_ARF, |
.type = TOY_TYPE_F, |
.rect = TOY_RECT_LINEAR, |
.indirect = false, |
.indirect_subreg = 0, |
.writemask = TOY_WRITEMASK_XYZW, |
.pad = 0, |
.val32 = 0, |
}; |
return null_dst; |
} |
/** |
* Construct a destination operand from a source operand. |
*/ |
static inline struct toy_dst |
tdst_from(struct toy_src src) |
{ |
const enum toy_writemask writemask = |
(1 << src.swizzle_x) | |
(1 << src.swizzle_y) | |
(1 << src.swizzle_z) | |
(1 << src.swizzle_w); |
return tdst_full(src.file, src.type, src.rect, |
src.indirect, src.indirect_subreg, writemask, src.val32); |
} |
/** |
* Construct a destination operand, assuming the type is TOY_TYPE_F, the |
* rectangle is TOY_RECT_LINEAR, and the writemask is TOY_WRITEMASK_XYZW. |
*/ |
static inline struct toy_dst |
tdst(enum toy_file file, unsigned reg, unsigned subreg_in_bytes) |
{ |
const enum toy_type type = TOY_TYPE_F; |
const enum toy_rect rect = TOY_RECT_LINEAR; |
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes; |
return tdst_full(file, type, rect, |
false, 0, TOY_WRITEMASK_XYZW, val32); |
} |
/** |
* Construct an immediate destination operand of type TOY_TYPE_W. |
*/ |
static inline struct toy_dst |
tdst_imm_w(int16_t w) |
{ |
const union fi fi = { .i = w }; |
return tdst_full(TOY_FILE_IMM, TOY_TYPE_W, TOY_RECT_LINEAR, |
false, 0, TOY_WRITEMASK_XYZW, fi.ui); |
} |
/** |
* Return true if the source operand is null. |
*/ |
static inline bool |
tsrc_is_null(struct toy_src src) |
{ |
/* BRW_ARF_NULL happens to be 0 */ |
return (src.file == TOY_FILE_ARF && src.val32 == 0); |
} |
/** |
* Return true if the source operand is swizzled. |
*/ |
static inline bool |
tsrc_is_swizzled(struct toy_src src) |
{ |
return (src.swizzle_x != TOY_SWIZZLE_X || |
src.swizzle_y != TOY_SWIZZLE_Y || |
src.swizzle_z != TOY_SWIZZLE_Z || |
src.swizzle_w != TOY_SWIZZLE_W); |
} |
/** |
* Return true if the source operand is swizzled to the same channel. |
*/ |
static inline bool |
tsrc_is_swizzle1(struct toy_src src) |
{ |
return (src.swizzle_x == src.swizzle_y && |
src.swizzle_x == src.swizzle_z && |
src.swizzle_x == src.swizzle_w); |
} |
/** |
* Validate the source operand. |
*/ |
static inline struct toy_src |
tsrc_validate(struct toy_src src) |
{ |
switch (src.file) { |
case TOY_FILE_VRF: |
case TOY_FILE_ARF: |
case TOY_FILE_MRF: |
assert(!src.indirect); |
if (src.file == TOY_FILE_MRF) |
assert(src.val32 < toy_file_size(src.file)); |
break; |
case TOY_FILE_GRF: |
if (!src.indirect) |
assert(src.val32 < toy_file_size(src.file)); |
break; |
case TOY_FILE_IMM: |
assert(!src.indirect); |
break; |
default: |
assert(!"invalid src file"); |
break; |
} |
switch (src.type) { |
case TOY_TYPE_V: |
assert(src.file == TOY_FILE_IMM); |
break; |
default: |
break; |
} |
if (src.file != TOY_FILE_IMM) |
assert(src.val32 % toy_type_size(src.type) == 0); |
assert(src.swizzle_x < 4 && src.swizzle_y < 4 && |
src.swizzle_z < 4 && src.swizzle_w < 4); |
return src; |
} |
/** |
* Change the type of the source operand. |
*/ |
static inline struct toy_src |
tsrc_type(struct toy_src src, enum toy_type type) |
{ |
src.type = type; |
return tsrc_validate(src); |
} |
/** |
* Change the type of the source operand to TOY_TYPE_D. |
*/ |
static inline struct toy_src |
tsrc_d(struct toy_src src) |
{ |
return tsrc_type(src, TOY_TYPE_D); |
} |
/** |
* Change the type of the source operand to TOY_TYPE_UD. |
*/ |
static inline struct toy_src |
tsrc_ud(struct toy_src src) |
{ |
return tsrc_type(src, TOY_TYPE_UD); |
} |
/** |
* Change the type of the source operand to TOY_TYPE_W. |
*/ |
static inline struct toy_src |
tsrc_w(struct toy_src src) |
{ |
return tsrc_type(src, TOY_TYPE_W); |
} |
/** |
* Change the type of the source operand to TOY_TYPE_UW. |
*/ |
static inline struct toy_src |
tsrc_uw(struct toy_src src) |
{ |
return tsrc_type(src, TOY_TYPE_UW); |
} |
/** |
* Change the rectangle of the source operand. |
*/ |
static inline struct toy_src |
tsrc_rect(struct toy_src src, enum toy_rect rect) |
{ |
src.rect = rect; |
return tsrc_validate(src); |
} |
/** |
* Swizzle the source operand. Note that the current swizzles are honored. |
*/ |
static inline struct toy_src |
tsrc_swizzle(struct toy_src src, |
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y, |
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w) |
{ |
const enum toy_swizzle current[4] = { |
src.swizzle_x, src.swizzle_y, |
src.swizzle_z, src.swizzle_w, |
}; |
src.swizzle_x = current[swizzle_x]; |
src.swizzle_y = current[swizzle_y]; |
src.swizzle_z = current[swizzle_z]; |
src.swizzle_w = current[swizzle_w]; |
return tsrc_validate(src); |
} |
/** |
* Swizzle the source operand to the same channel. Note that the current |
* swizzles are honored. |
*/ |
static inline struct toy_src |
tsrc_swizzle1(struct toy_src src, enum toy_swizzle swizzle) |
{ |
return tsrc_swizzle(src, swizzle, swizzle, swizzle, swizzle); |
} |
/** |
* Set absolute and unset negate of the source operand. |
*/ |
static inline struct toy_src |
tsrc_absolute(struct toy_src src) |
{ |
src.absolute = true; |
src.negate = false; |
return tsrc_validate(src); |
} |
/** |
* Negate the source operand. |
*/ |
static inline struct toy_src |
tsrc_negate(struct toy_src src) |
{ |
src.negate = !src.negate; |
return tsrc_validate(src); |
} |
/** |
* Offset the source operand. |
*/ |
static inline struct toy_src |
tsrc_offset(struct toy_src src, int reg, int subreg) |
{ |
src.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(src.type); |
return tsrc_validate(src); |
} |
/** |
* Construct a source operand. |
*/ |
static inline struct toy_src |
tsrc_full(enum toy_file file, enum toy_type type, |
enum toy_rect rect, bool indirect, unsigned indirect_subreg, |
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y, |
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w, |
bool absolute, bool negate, |
uint32_t val32) |
{ |
struct toy_src src; |
src.file = file; |
src.type = type; |
src.rect = rect; |
src.indirect = indirect; |
src.indirect_subreg = indirect_subreg; |
src.swizzle_x = swizzle_x; |
src.swizzle_y = swizzle_y; |
src.swizzle_z = swizzle_z; |
src.swizzle_w = swizzle_w; |
src.absolute = absolute; |
src.negate = negate; |
src.pad = 0; |
src.val32 = val32; |
return tsrc_validate(src); |
} |
/** |
* Construct a null source operand. |
*/ |
static inline struct toy_src |
tsrc_null(void) |
{ |
static const struct toy_src null_src = { |
.file = TOY_FILE_ARF, |
.type = TOY_TYPE_F, |
.rect = TOY_RECT_LINEAR, |
.indirect = false, |
.indirect_subreg = 0, |
.swizzle_x = TOY_SWIZZLE_X, |
.swizzle_y = TOY_SWIZZLE_Y, |
.swizzle_z = TOY_SWIZZLE_Z, |
.swizzle_w = TOY_SWIZZLE_W, |
.absolute = false, |
.negate = false, |
.pad = 0, |
.val32 = 0, |
}; |
return null_src; |
} |
/** |
* Construct a source operand from a destination operand. |
*/ |
static inline struct toy_src |
tsrc_from(struct toy_dst dst) |
{ |
enum toy_swizzle swizzle[4]; |
if (dst.writemask == TOY_WRITEMASK_XYZW) { |
swizzle[0] = TOY_SWIZZLE_X; |
swizzle[1] = TOY_SWIZZLE_Y; |
swizzle[2] = TOY_SWIZZLE_Z; |
swizzle[3] = TOY_SWIZZLE_W; |
} |
else { |
const enum toy_swizzle first = |
(dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : |
(dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : |
(dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : |
(dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : |
TOY_SWIZZLE_X; |
swizzle[0] = (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : first; |
swizzle[1] = (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : first; |
swizzle[2] = (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : first; |
swizzle[3] = (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : first; |
} |
return tsrc_full(dst.file, dst.type, dst.rect, |
dst.indirect, dst.indirect_subreg, |
swizzle[0], swizzle[1], swizzle[2], swizzle[3], |
false, false, dst.val32); |
} |
/** |
* Construct a source operand, assuming the type is TOY_TYPE_F, the |
* rectangle is TOY_RECT_LINEAR, and no swizzles/absolute/negate. |
*/ |
static inline struct toy_src |
tsrc(enum toy_file file, unsigned reg, unsigned subreg_in_bytes) |
{ |
const enum toy_type type = TOY_TYPE_F; |
const enum toy_rect rect = TOY_RECT_LINEAR; |
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes; |
return tsrc_full(file, type, rect, false, 0, |
TOY_SWIZZLE_X, TOY_SWIZZLE_Y, |
TOY_SWIZZLE_Z, TOY_SWIZZLE_W, |
false, false, val32); |
} |
/** |
* Construct an immediate source operand. |
*/ |
static inline struct toy_src |
tsrc_imm(enum toy_type type, uint32_t val32) |
{ |
return tsrc_full(TOY_FILE_IMM, type, TOY_RECT_LINEAR, false, 0, |
TOY_SWIZZLE_X, TOY_SWIZZLE_Y, |
TOY_SWIZZLE_Z, TOY_SWIZZLE_W, |
false, false, val32); |
} |
/** |
* Construct an immediate source operand of type TOY_TYPE_F. |
*/ |
static inline struct toy_src |
tsrc_imm_f(float f) |
{ |
const union fi fi = { .f = f }; |
return tsrc_imm(TOY_TYPE_F, fi.ui); |
} |
/** |
* Construct an immediate source operand of type TOY_TYPE_D. |
*/ |
static inline struct toy_src |
tsrc_imm_d(int32_t d) |
{ |
const union fi fi = { .i = d }; |
return tsrc_imm(TOY_TYPE_D, fi.ui); |
} |
/** |
* Construct an immediate source operand of type TOY_TYPE_UD. |
*/ |
static inline struct toy_src |
tsrc_imm_ud(uint32_t ud) |
{ |
const union fi fi = { .ui = ud }; |
return tsrc_imm(TOY_TYPE_UD, fi.ui); |
} |
/** |
* Construct an immediate source operand of type TOY_TYPE_W. |
*/ |
static inline struct toy_src |
tsrc_imm_w(int16_t w) |
{ |
const union fi fi = { .i = w }; |
return tsrc_imm(TOY_TYPE_W, fi.ui); |
} |
/** |
* Construct an immediate source operand of type TOY_TYPE_UW. |
*/ |
static inline struct toy_src |
tsrc_imm_uw(uint16_t uw) |
{ |
const union fi fi = { .ui = uw }; |
return tsrc_imm(TOY_TYPE_UW, fi.ui); |
} |
/** |
* Construct an immediate source operand of type TOY_TYPE_V. |
*/ |
static inline struct toy_src |
tsrc_imm_v(uint32_t v) |
{ |
return tsrc_imm(TOY_TYPE_V, v); |
} |
#endif /* TOY_REG_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_helpers.h |
---|
0,0 → 1,289 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef TOY_HELPERS_H |
#define TOY_HELPERS_H |
#include "toy_compiler.h" |
/** |
* Transpose a dst operand. |
* |
* Instead of processing a single vertex with each of its attributes in one |
* register, such as |
* |
* r0 = [x0, y0, z0, w0] |
* |
* we want to process four vertices at a time |
* |
* r0 = [x0, y0, z0, w0] |
* r1 = [x1, y1, z1, w1] |
* r2 = [x2, y2, z2, w2] |
* r3 = [x3, y3, z3, w3] |
* |
* but with the attribute data "transposed" |
* |
* r0 = [x0, x1, x2, x3] |
* r1 = [y0, y1, y2, y3] |
* r2 = [z0, z1, z2, z3] |
* r3 = [w0, w1, w2, w3] |
* |
* This is also known as the SoA form. |
*/ |
static inline void |
tdst_transpose(struct toy_dst dst, struct toy_dst *trans) |
{ |
int i; |
switch (dst.file) { |
case TOY_FILE_VRF: |
assert(!dst.indirect); |
for (i = 0; i < 4; i++) { |
if (dst.writemask & (1 << i)) { |
trans[i] = tdst_offset(dst, i, 0); |
trans[i].writemask = TOY_WRITEMASK_XYZW; |
} |
else { |
trans[i] = tdst_null(); |
} |
} |
break; |
case TOY_FILE_ARF: |
assert(tdst_is_null(dst)); |
for (i = 0; i < 4; i++) |
trans[i] = dst; |
break; |
case TOY_FILE_GRF: |
case TOY_FILE_MRF: |
case TOY_FILE_IMM: |
default: |
assert(!"unexpected file in dst transposition"); |
for (i = 0; i < 4; i++) |
trans[i] = tdst_null(); |
break; |
} |
} |
/** |
* Transpose a src operand. |
*/ |
static inline void |
tsrc_transpose(struct toy_src src, struct toy_src *trans) |
{ |
const enum toy_swizzle swizzle[4] = { |
src.swizzle_x, src.swizzle_y, |
src.swizzle_z, src.swizzle_w, |
}; |
int i; |
switch (src.file) { |
case TOY_FILE_VRF: |
assert(!src.indirect); |
for (i = 0; i < 4; i++) { |
trans[i] = tsrc_offset(src, swizzle[i], 0); |
trans[i].swizzle_x = TOY_SWIZZLE_X; |
trans[i].swizzle_y = TOY_SWIZZLE_Y; |
trans[i].swizzle_z = TOY_SWIZZLE_Z; |
trans[i].swizzle_w = TOY_SWIZZLE_W; |
} |
break; |
case TOY_FILE_ARF: |
assert(tsrc_is_null(src)); |
/* fall through */ |
case TOY_FILE_IMM: |
for (i = 0; i < 4; i++) |
trans[i] = src; |
break; |
case TOY_FILE_GRF: |
case TOY_FILE_MRF: |
default: |
assert(!"unexpected file in src transposition"); |
for (i = 0; i < 4; i++) |
trans[i] = tsrc_null(); |
break; |
} |
} |
static inline struct toy_src |
tsrc_imm_mdesc(const struct toy_compiler *tc, |
bool eot, |
unsigned message_length, |
unsigned response_length, |
bool header_present, |
uint32_t function_control) |
{ |
uint32_t desc; |
assert(message_length >= 1 && message_length <= 15); |
assert(response_length >= 0 && response_length <= 16); |
assert(function_control < 1 << 19); |
desc = eot << 31 | |
message_length << 25 | |
response_length << 20 | |
header_present << 19 | |
function_control; |
return tsrc_imm_ud(desc); |
} |
static inline struct toy_src |
tsrc_imm_mdesc_sampler(const struct toy_compiler *tc, |
unsigned message_length, |
unsigned response_length, |
bool header_present, |
unsigned simd_mode, |
unsigned message_type, |
unsigned sampler_index, |
unsigned binding_table_index) |
{ |
const bool eot = false; |
uint32_t ctrl; |
assert(simd_mode < 4); |
assert(sampler_index < 16); |
assert(binding_table_index < 256); |
if (tc->dev->gen >= ILO_GEN(7)) { |
ctrl = simd_mode << 17 | |
message_type << 12 | |
sampler_index << 8 | |
binding_table_index; |
} |
else { |
ctrl = simd_mode << 16 | |
message_type << 12 | |
sampler_index << 8 | |
binding_table_index; |
} |
return tsrc_imm_mdesc(tc, eot, message_length, |
response_length, header_present, ctrl); |
} |
static inline struct toy_src |
tsrc_imm_mdesc_data_port(const struct toy_compiler *tc, |
bool eot, |
unsigned message_length, |
unsigned response_length, |
bool header_present, |
bool send_write_commit_message, |
unsigned message_type, |
unsigned message_specific_control, |
unsigned binding_table_index) |
{ |
uint32_t ctrl; |
if (tc->dev->gen >= ILO_GEN(7)) { |
assert(!send_write_commit_message); |
assert((message_specific_control & 0x3f00) == message_specific_control); |
ctrl = message_type << 14 | |
(message_specific_control & 0x3f00) | |
binding_table_index; |
} |
else { |
assert(!send_write_commit_message || |
message_type == GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE); |
assert((message_specific_control & 0x1f00) == message_specific_control); |
ctrl = send_write_commit_message << 17 | |
message_type << 13 | |
(message_specific_control & 0x1f00) | |
binding_table_index; |
} |
return tsrc_imm_mdesc(tc, eot, message_length, |
response_length, header_present, ctrl); |
} |
static inline struct toy_src |
tsrc_imm_mdesc_data_port_scratch(const struct toy_compiler *tc, |
unsigned message_length, |
unsigned response_length, |
bool write_type, |
bool dword_mode, |
bool invalidate_after_read, |
int num_registers, |
int hword_offset) |
{ |
const bool eot = false; |
const bool header_present = true; |
uint32_t ctrl; |
assert(tc->dev->gen >= ILO_GEN(7)); |
assert(num_registers == 1 || num_registers == 2 || num_registers == 4); |
ctrl = 1 << 18 | |
write_type << 17 | |
dword_mode << 16 | |
invalidate_after_read << 15 | |
(num_registers - 1) << 12 | |
hword_offset; |
return tsrc_imm_mdesc(tc, eot, message_length, |
response_length, header_present, ctrl); |
} |
static inline struct toy_src |
tsrc_imm_mdesc_urb(const struct toy_compiler *tc, |
bool eot, |
unsigned message_length, |
unsigned response_length, |
bool complete, |
bool used, |
bool allocate, |
unsigned swizzle_control, |
unsigned global_offset, |
unsigned urb_opcode) |
{ |
const bool header_present = true; |
uint32_t ctrl; |
if (tc->dev->gen >= ILO_GEN(7)) { |
const bool per_slot_offset = false; |
ctrl = per_slot_offset << 16 | |
complete << 15 | |
swizzle_control << 14 | |
global_offset << 3 | |
urb_opcode; |
} |
else { |
ctrl = complete << 15 | |
used << 14 | |
allocate << 13 | |
swizzle_control << 10 | |
global_offset << 4 | |
urb_opcode; |
} |
return tsrc_imm_mdesc(tc, eot, message_length, |
response_length, header_present, ctrl); |
} |
#endif /* TOY_HELPERS_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_legalize.c |
---|
0,0 → 1,632 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "pipe/p_shader_tokens.h" |
#include "toy_compiler.h" |
#include "toy_tgsi.h" |
#include "toy_helpers.h" |
#include "toy_legalize.h" |
/** |
* Lower an instruction to BRW_OPCODE_SEND(C). |
*/ |
void |
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst, |
bool sendc, unsigned sfid) |
{ |
assert(inst->opcode >= 128); |
inst->opcode = (sendc) ? BRW_OPCODE_SENDC : BRW_OPCODE_SEND; |
/* thread control is reserved */ |
assert(inst->thread_ctrl == 0); |
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE); |
inst->cond_modifier = sfid; |
} |
static int |
math_op_to_func(unsigned opcode) |
{ |
switch (opcode) { |
case TOY_OPCODE_INV: return BRW_MATH_FUNCTION_INV; |
case TOY_OPCODE_LOG: return BRW_MATH_FUNCTION_LOG; |
case TOY_OPCODE_EXP: return BRW_MATH_FUNCTION_EXP; |
case TOY_OPCODE_SQRT: return BRW_MATH_FUNCTION_SQRT; |
case TOY_OPCODE_RSQ: return BRW_MATH_FUNCTION_RSQ; |
case TOY_OPCODE_SIN: return BRW_MATH_FUNCTION_SIN; |
case TOY_OPCODE_COS: return BRW_MATH_FUNCTION_COS; |
case TOY_OPCODE_FDIV: return BRW_MATH_FUNCTION_FDIV; |
case TOY_OPCODE_POW: return BRW_MATH_FUNCTION_POW; |
case TOY_OPCODE_INT_DIV_QUOTIENT: return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; |
case TOY_OPCODE_INT_DIV_REMAINDER: return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; |
default: |
assert(!"unknown math opcode"); |
return -1; |
} |
} |
/** |
* Lower virtual math opcodes to BRW_OPCODE_MATH. |
*/ |
void |
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
struct toy_dst tmp; |
int i; |
/* see commit 250770b74d33bb8625c780a74a89477af033d13a */ |
for (i = 0; i < Elements(inst->src); i++) { |
if (tsrc_is_null(inst->src[i])) |
break; |
/* no swizzling in align1 */ |
/* XXX how about source modifiers? */ |
if (toy_file_is_virtual(inst->src[i].file) && |
!tsrc_is_swizzled(inst->src[i]) && |
!inst->src[i].absolute && |
!inst->src[i].negate) |
continue; |
tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type); |
tc_MOV(tc, tmp, inst->src[i]); |
inst->src[i] = tsrc_from(tmp); |
} |
/* FC[0:3] */ |
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE); |
inst->cond_modifier = math_op_to_func(inst->opcode); |
/* FC[4:5] */ |
assert(inst->thread_ctrl == 0); |
inst->thread_ctrl = 0; |
inst->opcode = BRW_OPCODE_MATH; |
tc_move_inst(tc, inst); |
/* no writemask in align1 */ |
if (inst->dst.writemask != TOY_WRITEMASK_XYZW) { |
struct toy_dst dst = inst->dst; |
struct toy_inst *inst2; |
tmp = tc_alloc_tmp(tc); |
tmp.type = inst->dst.type; |
inst->dst = tmp; |
inst2 = tc_MOV(tc, dst, tsrc_from(tmp)); |
inst2->pred_ctrl = inst->pred_ctrl; |
} |
} |
static uint32_t |
absolute_imm(uint32_t imm32, enum toy_type type) |
{ |
union fi val = { .ui = imm32 }; |
switch (type) { |
case TOY_TYPE_F: |
val.f = fabs(val.f); |
break; |
case TOY_TYPE_D: |
if (val.i < 0) |
val.i = -val.i; |
break; |
case TOY_TYPE_W: |
if ((int16_t) (val.ui & 0xffff) < 0) |
val.i = -((int16_t) (val.ui & 0xffff)); |
break; |
case TOY_TYPE_V: |
assert(!"cannot take absoulte of immediates of type V"); |
break; |
default: |
break; |
} |
return val.ui; |
} |
static uint32_t |
negate_imm(uint32_t imm32, enum toy_type type) |
{ |
union fi val = { .ui = imm32 }; |
switch (type) { |
case TOY_TYPE_F: |
val.f = -val.f; |
break; |
case TOY_TYPE_D: |
case TOY_TYPE_UD: |
val.i = -val.i; |
break; |
case TOY_TYPE_W: |
case TOY_TYPE_UW: |
val.i = -((int16_t) (val.ui & 0xffff)); |
break; |
default: |
assert(!"negate immediate of unknown type"); |
break; |
} |
return val.ui; |
} |
static void |
validate_imm(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
bool move_inst = false; |
int i; |
for (i = 0; i < Elements(inst->src); i++) { |
struct toy_dst tmp; |
if (tsrc_is_null(inst->src[i])) |
break; |
if (inst->src[i].file != TOY_FILE_IMM) |
continue; |
if (inst->src[i].absolute) { |
inst->src[i].val32 = |
absolute_imm(inst->src[i].val32, inst->src[i].type); |
inst->src[i].absolute = false; |
} |
if (inst->src[i].negate) { |
inst->src[i].val32 = |
negate_imm(inst->src[i].val32, inst->src[i].type); |
inst->src[i].negate = false; |
} |
/* this is the last operand */ |
if (i + 1 == Elements(inst->src) || tsrc_is_null(inst->src[i + 1])) |
break; |
/* need to use a temp if this imm is not the last operand */ |
/* TODO we should simply swap the operands if the op is commutative */ |
tmp = tc_alloc_tmp(tc); |
tmp = tdst_type(tmp, inst->src[i].type); |
tc_MOV(tc, tmp, inst->src[i]); |
inst->src[i] = tsrc_from(tmp); |
move_inst = true; |
} |
if (move_inst) |
tc_move_inst(tc, inst); |
} |
static void |
lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
const enum toy_type inst_type = inst->dst.type; |
const struct toy_dst acc0 = |
tdst_type(tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0), inst_type); |
struct toy_inst *inst2; |
/* only need to take care of integer multiplications */ |
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) |
return; |
/* acc0 = (src0 & 0x0000ffff) * src1 */ |
tc_MUL(tc, acc0, inst->src[0], inst->src[1]); |
/* acc0 = (src0 & 0xffff0000) * src1 + acc0 */ |
inst2 = tc_add2(tc, BRW_OPCODE_MACH, tdst_type(tdst_null(), inst_type), |
inst->src[0], inst->src[1]); |
inst2->acc_wr_ctrl = true; |
/* dst = acc0 & 0xffffffff */ |
tc_MOV(tc, inst->dst, tsrc_from(acc0)); |
tc_discard_inst(tc, inst); |
} |
static void |
lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
const enum toy_type inst_type = inst->dst.type; |
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) { |
const struct toy_dst acc0 = tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0); |
tc_MOV(tc, acc0, inst->src[2]); |
inst->src[2] = tsrc_null(); |
tc_move_inst(tc, inst); |
} |
else { |
struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type); |
struct toy_inst *inst2; |
inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]); |
lower_opcode_mul(tc, inst2); |
tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]); |
tc_discard_inst(tc, inst); |
} |
} |
/** |
* Legalize the instructions for register allocation. |
*/ |
void |
toy_compiler_legalize_for_ra(struct toy_compiler *tc) |
{ |
struct toy_inst *inst; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case BRW_OPCODE_MAC: |
lower_opcode_mac(tc, inst); |
break; |
case BRW_OPCODE_MAD: |
/* TODO operands must be floats */ |
break; |
case BRW_OPCODE_MUL: |
lower_opcode_mul(tc, inst); |
break; |
default: |
if (inst->opcode > TOY_OPCODE_LAST_HW) |
tc_fail(tc, "internal opcodes not lowered"); |
} |
} |
/* loop again as the previous pass may add new instructions */ |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
validate_imm(tc, inst); |
} |
} |
static void |
patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
struct toy_inst *inst2; |
int nest_level, dist; |
nest_level = 0; |
dist = -1; |
/* search backward */ |
LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev, |
&tc->instructions, list) { |
if (inst2->marker) { |
if (inst2->opcode == BRW_OPCODE_DO) { |
if (nest_level) { |
nest_level--; |
} |
else { |
/* the following instruction */ |
dist++; |
break; |
} |
} |
continue; |
} |
if (inst2->opcode == BRW_OPCODE_WHILE) |
nest_level++; |
dist--; |
} |
if (tc->dev->gen >= ILO_GEN(7)) |
inst->src[1] = tsrc_imm_w(dist * 2); |
else |
inst->dst = tdst_imm_w(dist * 2); |
} |
static void |
patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
struct toy_inst *inst2; |
int nest_level, dist; |
int jip, uip; |
nest_level = 0; |
dist = 1; |
jip = 0; |
uip = 0; |
/* search forward */ |
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { |
if (inst2->marker) |
continue; |
if (inst2->opcode == BRW_OPCODE_ENDIF) { |
if (nest_level) { |
nest_level--; |
} |
else { |
uip = dist * 2; |
if (!jip) |
jip = uip; |
break; |
} |
} |
else if (inst2->opcode == BRW_OPCODE_ELSE && |
inst->opcode == BRW_OPCODE_IF) { |
if (!nest_level) { |
/* the following instruction */ |
jip = (dist + 1) * 2; |
if (tc->dev->gen == ILO_GEN(6)) { |
uip = jip; |
break; |
} |
} |
} |
else if (inst2->opcode == BRW_OPCODE_IF) { |
nest_level++; |
} |
dist++; |
} |
if (tc->dev->gen >= ILO_GEN(7)) { |
/* what should the type be? */ |
inst->dst.type = TOY_TYPE_D; |
inst->src[0].type = TOY_TYPE_D; |
inst->src[1] = tsrc_imm_d(uip << 16 | jip); |
} |
else { |
inst->dst = tdst_imm_w(jip); |
} |
inst->thread_ctrl = BRW_THREAD_SWITCH; |
} |
static void |
patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
struct toy_inst *inst2; |
bool found = false; |
int dist = 1; |
/* search forward for instructions that may enable channels */ |
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { |
if (inst2->marker) |
continue; |
switch (inst2->opcode) { |
case BRW_OPCODE_ENDIF: |
case BRW_OPCODE_ELSE: |
case BRW_OPCODE_WHILE: |
found = true; |
break; |
default: |
break; |
} |
if (found) |
break; |
dist++; |
} |
/* should we set dist to (dist - 1) or 1? */ |
if (!found) |
dist = 1; |
if (tc->dev->gen >= ILO_GEN(7)) |
inst->src[1] = tsrc_imm_w(dist * 2); |
else |
inst->dst = tdst_imm_w(dist * 2); |
inst->thread_ctrl = BRW_THREAD_SWITCH; |
} |
static void |
patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst) |
{ |
struct toy_inst *inst2, *inst3; |
int nest_level, dist, jip, uip; |
nest_level = 0; |
dist = 1; |
jip = 1 * 2; |
uip = 1 * 2; |
/* search forward */ |
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { |
if (inst2->marker) { |
if (inst2->opcode == BRW_OPCODE_DO) |
nest_level++; |
continue; |
} |
if (inst2->opcode == BRW_OPCODE_ELSE || |
inst2->opcode == BRW_OPCODE_ENDIF || |
inst2->opcode == BRW_OPCODE_WHILE) { |
jip = dist * 2; |
break; |
} |
dist++; |
} |
/* go on to determine uip */ |
inst3 = inst2; |
LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) { |
if (inst2->marker) { |
if (inst2->opcode == BRW_OPCODE_DO) |
nest_level++; |
continue; |
} |
if (inst2->opcode == BRW_OPCODE_WHILE) { |
if (nest_level) { |
nest_level--; |
} |
else { |
/* the following instruction */ |
if (tc->dev->gen == ILO_GEN(6) && inst->opcode == BRW_OPCODE_BREAK) |
dist++; |
uip = dist * 2; |
break; |
} |
} |
dist++; |
} |
/* should the type be D or W? */ |
inst->dst.type = TOY_TYPE_D; |
inst->src[0].type = TOY_TYPE_D; |
inst->src[1] = tsrc_imm_d(uip << 16 | jip); |
} |
/** |
* Legalize the instructions for assembling. |
*/ |
void |
toy_compiler_legalize_for_asm(struct toy_compiler *tc) |
{ |
struct toy_inst *inst; |
int pc = 0; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
int i; |
pc++; |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 112: |
* |
* "Specifically, for instructions with a single source, it only |
* uses the first source operand <src0>. In this case, the second |
* source operand <src1> must be set to null and also with the same |
* type as the first source operand <src0>. It is a special case |
* when <src0> is an immediate, as an immediate <src0> uses DW3 of |
* the instruction word, which is normally used by <src1>. In this |
* case, <src1> must be programmed with register file ARF and the |
* same data type as <src0>." |
* |
* Since we already fill unused operands with null, we only need to take |
* care of the type. |
*/ |
if (tsrc_is_null(inst->src[1])) |
inst->src[1].type = inst->src[0].type; |
switch (inst->opcode) { |
case BRW_OPCODE_MATH: |
/* math does not support align16 nor exec_size > 8 */ |
inst->access_mode = BRW_ALIGN_1; |
if (inst->exec_size == BRW_EXECUTE_16) { |
/* |
* From the Ivy Bridge PRM, volume 4 part 3, page 192: |
* |
* "INT DIV function does not support SIMD16." |
*/ |
if (tc->dev->gen < ILO_GEN(7) || |
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || |
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_REMAINDER) { |
struct toy_inst *inst2; |
inst->exec_size = BRW_EXECUTE_8; |
inst->qtr_ctrl = GEN6_COMPRESSION_1Q; |
inst2 = tc_duplicate_inst(tc, inst); |
inst2->qtr_ctrl = GEN6_COMPRESSION_2Q; |
inst2->dst = tdst_offset(inst2->dst, 1, 0); |
inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0); |
if (!tsrc_is_null(inst2->src[1])) |
inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0); |
pc++; |
} |
} |
break; |
case BRW_OPCODE_IF: |
if (tc->dev->gen >= ILO_GEN(7) && |
inst->cond_modifier != BRW_CONDITIONAL_NONE) { |
struct toy_inst *inst2; |
inst2 = tc_duplicate_inst(tc, inst); |
/* replace the original IF by CMP */ |
inst->opcode = BRW_OPCODE_CMP; |
/* predicate control instead of condition modifier */ |
inst2->dst = tdst_null(); |
inst2->src[0] = tsrc_null(); |
inst2->src[1] = tsrc_null(); |
inst2->cond_modifier = BRW_CONDITIONAL_NONE; |
inst2->pred_ctrl = BRW_PREDICATE_NORMAL; |
pc++; |
} |
break; |
default: |
break; |
} |
/* MRF to GRF */ |
if (tc->dev->gen >= ILO_GEN(7)) { |
for (i = 0; i < Elements(inst->src); i++) { |
if (inst->src[i].file != TOY_FILE_MRF) |
continue; |
else if (tsrc_is_null(inst->src[i])) |
break; |
inst->src[i].file = TOY_FILE_GRF; |
} |
if (inst->dst.file == TOY_FILE_MRF) |
inst->dst.file = TOY_FILE_GRF; |
} |
} |
tc->num_instructions = pc; |
/* set JIP/UIP */ |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case BRW_OPCODE_IF: |
case BRW_OPCODE_ELSE: |
patch_if_else_jip(tc, inst); |
break; |
case BRW_OPCODE_ENDIF: |
patch_endif_jip(tc, inst); |
break; |
case BRW_OPCODE_WHILE: |
patch_while_jip(tc, inst); |
break; |
case BRW_OPCODE_BREAK: |
case BRW_OPCODE_CONTINUE: |
patch_break_continue_jip(tc, inst); |
break; |
default: |
break; |
} |
} |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_legalize.h |
---|
0,0 → 1,52 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef TOY_LEGALIZE_H |
#define TOY_LEGALIZE_H |
#include "toy_compiler.h" |
#include "toy_tgsi.h" |
void |
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst, |
bool sendc, unsigned sfid); |
void |
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst); |
void |
toy_compiler_allocate_registers(struct toy_compiler *tc, |
int start_grf, int end_grf, |
int num_grf_per_vrf); |
void |
toy_compiler_legalize_for_ra(struct toy_compiler *tc); |
void |
toy_compiler_legalize_for_asm(struct toy_compiler *tc); |
#endif /* TOY_LEGALIZE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_legalize_ra.c |
---|
0,0 → 1,628 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include <stdlib.h> /* for qsort() */ |
#include "toy_compiler.h" |
#include "toy_legalize.h" |
/** |
* Live interval of a VRF register. |
*/ |
struct linear_scan_live_interval { |
int vrf; |
int startpoint; |
int endpoint; |
/* |
* should this be assigned a consecutive register of the previous |
* interval's? |
*/ |
bool consecutive; |
int reg; |
struct list_head list; |
}; |
/** |
* Linear scan. |
*/ |
struct linear_scan { |
struct linear_scan_live_interval *intervals; |
int max_vrf, num_vrfs; |
int num_regs; |
struct list_head active_list; |
int *free_regs; |
int num_free_regs; |
int *vrf_mapping; |
}; |
/** |
* Return a chunk of registers to the free register pool. |
*/ |
static void |
linear_scan_free_regs(struct linear_scan *ls, int reg, int count) |
{ |
int i; |
for (i = 0; i < count; i++) |
ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i; |
} |
static int |
linear_scan_compare_regs(const void *elem1, const void *elem2) |
{ |
const int *reg1 = elem1; |
const int *reg2 = elem2; |
/* in reverse order */ |
return (*reg2 - *reg1); |
} |
/** |
* Allocate a chunk of registers from the free register pool. |
*/ |
static int |
linear_scan_allocate_regs(struct linear_scan *ls, int count) |
{ |
bool sorted = false; |
int reg; |
/* simple cases */ |
if (count > ls->num_free_regs) |
return -1; |
else if (count == 1) |
return ls->free_regs[--ls->num_free_regs]; |
/* TODO a free register pool */ |
/* TODO reserve some regs for spilling */ |
while (true) { |
bool found = false; |
int start; |
/* |
* find a chunk of registers that have consecutive register |
* numbers |
*/ |
for (start = ls->num_free_regs - 1; start >= count - 1; start--) { |
int i; |
for (i = 1; i < count; i++) { |
if (ls->free_regs[start - i] != ls->free_regs[start] + i) |
break; |
} |
if (i >= count) { |
found = true; |
break; |
} |
} |
if (found) { |
reg = ls->free_regs[start]; |
if (start != ls->num_free_regs - 1) { |
start++; |
memmove(&ls->free_regs[start - count], |
&ls->free_regs[start], |
sizeof(*ls->free_regs) * (ls->num_free_regs - start)); |
} |
ls->num_free_regs -= count; |
break; |
} |
else if (!sorted) { |
/* sort and retry */ |
qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs), |
linear_scan_compare_regs); |
sorted = true; |
} |
else { |
/* failed */ |
reg = -1; |
break; |
} |
} |
return reg; |
} |
/** |
* Add an interval to the active list. |
*/ |
static void |
linear_scan_add_active(struct linear_scan *ls, |
struct linear_scan_live_interval *interval) |
{ |
struct linear_scan_live_interval *pos; |
/* keep the active list sorted by endpoints */ |
LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) { |
if (pos->endpoint >= interval->endpoint) |
break; |
} |
list_addtail(&interval->list, &pos->list); |
} |
/** |
* Remove an interval from the active list. |
*/ |
static void |
linear_scan_remove_active(struct linear_scan *ls, |
struct linear_scan_live_interval *interval) |
{ |
list_del(&interval->list); |
} |
/** |
* Remove intervals that are no longer active from the active list. |
*/ |
static void |
linear_scan_expire_active(struct linear_scan *ls, int pc) |
{ |
struct linear_scan_live_interval *interval, *next; |
LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) { |
/* |
* since we sort intervals on the active list by their endpoints, we |
* know that this and the rest of the intervals are still active. |
*/ |
if (interval->endpoint >= pc) |
break; |
linear_scan_remove_active(ls, interval); |
/* recycle the reg */ |
linear_scan_free_regs(ls, interval->reg, 1); |
} |
} |
/** |
* Spill an interval. |
*/ |
static void |
linear_scan_spill(struct linear_scan *ls, |
struct linear_scan_live_interval *interval, |
bool is_active) |
{ |
assert(!"no spilling support"); |
} |
/** |
* Spill a range of intervals. |
*/ |
static void |
linear_scan_spill_range(struct linear_scan *ls, int first, int count) |
{ |
int i; |
for (i = 0; i < count; i++) { |
struct linear_scan_live_interval *interval = &ls->intervals[first + i]; |
linear_scan_spill(ls, interval, false); |
} |
} |
/** |
* Perform linear scan to allocate registers for the intervals. |
*/ |
static bool |
linear_scan_run(struct linear_scan *ls) |
{ |
int i; |
i = 0; |
while (i < ls->num_vrfs) { |
struct linear_scan_live_interval *first = &ls->intervals[i]; |
int reg, count; |
/* |
* BRW_OPCODE_SEND may write to multiple consecutive registers and we need to |
* support that |
*/ |
for (count = 1; i + count < ls->num_vrfs; count++) { |
const struct linear_scan_live_interval *interval = |
&ls->intervals[i + count]; |
if (interval->startpoint != first->startpoint || |
!interval->consecutive) |
break; |
} |
reg = linear_scan_allocate_regs(ls, count); |
/* expire intervals that are no longer active and try again */ |
if (reg < 0) { |
linear_scan_expire_active(ls, first->startpoint); |
reg = linear_scan_allocate_regs(ls, count); |
} |
/* have to spill some intervals */ |
if (reg < 0) { |
struct linear_scan_live_interval *last_active = |
container_of(ls->active_list.prev, |
(struct linear_scan_live_interval *) NULL, list); |
/* heuristically spill the interval that ends last */ |
if (count > 1 || last_active->endpoint < first->endpoint) { |
linear_scan_spill_range(ls, i, count); |
i += count; |
continue; |
} |
/* make some room for the new interval */ |
linear_scan_spill(ls, last_active, true); |
reg = linear_scan_allocate_regs(ls, count); |
if (reg < 0) { |
assert(!"failed to spill any register"); |
return false; |
} |
} |
while (count--) { |
struct linear_scan_live_interval *interval = &ls->intervals[i++]; |
interval->reg = reg++; |
linear_scan_add_active(ls, interval); |
ls->vrf_mapping[interval->vrf] = interval->reg; |
/* |
* this should and must be the case because of how we initialized the |
* intervals |
*/ |
assert(interval->vrf - first->vrf == interval->reg - first->reg); |
} |
} |
return true; |
} |
/** |
* Add a new interval. |
*/ |
static void |
linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc) |
{ |
if (ls->intervals[vrf].vrf) |
return; |
ls->intervals[vrf].vrf = vrf; |
ls->intervals[vrf].startpoint = pc; |
ls->num_vrfs++; |
if (vrf > ls->max_vrf) |
ls->max_vrf = vrf; |
} |
/** |
* Perform (oversimplified?) live variable analysis. |
*/ |
static void |
linear_scan_init_live_intervals(struct linear_scan *ls, |
struct toy_compiler *tc) |
{ |
const struct toy_inst *inst; |
int pc, do_pc, while_pc; |
pc = 0; |
do_pc = -1; |
while_pc = -1; |
tc_head(tc); |
while ((inst = tc_next_no_skip(tc)) != NULL) { |
const int startpoint = (pc <= while_pc) ? do_pc : pc; |
const int endpoint = (pc <= while_pc) ? while_pc : pc; |
int vrf, i; |
/* |
* assume all registers used in this outermost loop are live through out |
* the whole loop |
*/ |
if (inst->marker) { |
if (pc > while_pc) { |
struct toy_inst *inst2; |
int loop_level = 1; |
assert(inst->opcode == BRW_OPCODE_DO); |
do_pc = pc; |
while_pc = pc + 1; |
/* find the matching BRW_OPCODE_WHILE */ |
LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next, |
&tc->instructions, list) { |
if (inst2->marker) { |
assert(inst->opcode == BRW_OPCODE_DO); |
loop_level++; |
continue; |
} |
if (inst2->opcode == BRW_OPCODE_WHILE) { |
loop_level--; |
if (!loop_level) |
break; |
} |
while_pc++; |
} |
} |
continue; |
} |
if (inst->dst.file == TOY_FILE_VRF) { |
int num_dst; |
/* TODO this is a hack */ |
if (inst->opcode == BRW_OPCODE_SEND || |
inst->opcode == BRW_OPCODE_SENDC) { |
const uint32_t mdesc = inst->src[1].val32; |
int response_length = (mdesc >> 20) & 0x1f; |
num_dst = response_length; |
if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16) |
num_dst /= 2; |
} |
else { |
num_dst = 1; |
} |
vrf = inst->dst.val32 / TOY_REG_WIDTH; |
for (i = 0; i < num_dst; i++) { |
/* first use */ |
if (!ls->intervals[vrf].vrf) |
linear_scan_add_live_interval(ls, vrf, startpoint); |
ls->intervals[vrf].endpoint = endpoint; |
ls->intervals[vrf].consecutive = (i > 0); |
vrf++; |
} |
} |
for (i = 0; i < Elements(inst->src); i++) { |
if (inst->src[i].file != TOY_FILE_VRF) |
continue; |
vrf = inst->src[i].val32 / TOY_REG_WIDTH; |
/* first use */ |
if (!ls->intervals[vrf].vrf) |
linear_scan_add_live_interval(ls, vrf, startpoint); |
ls->intervals[vrf].endpoint = endpoint; |
} |
pc++; |
} |
} |
/** |
* Clean up after performing linear scan. |
*/ |
static void |
linear_scan_cleanup(struct linear_scan *ls) |
{ |
FREE(ls->vrf_mapping); |
FREE(ls->intervals); |
FREE(ls->free_regs); |
} |
static int |
linear_scan_compare_live_intervals(const void *elem1, const void *elem2) |
{ |
const struct linear_scan_live_interval *interval1 = elem1; |
const struct linear_scan_live_interval *interval2 = elem2; |
/* make unused elements appear at the end */ |
if (!interval1->vrf) |
return 1; |
else if (!interval2->vrf) |
return -1; |
/* sort by startpoints first, and then by vrf */ |
if (interval1->startpoint != interval2->startpoint) |
return (interval1->startpoint - interval2->startpoint); |
else |
return (interval1->vrf - interval2->vrf); |
} |
/** |
* Prepare for linear scan. |
*/ |
static bool |
linear_scan_init(struct linear_scan *ls, int num_regs, |
struct toy_compiler *tc) |
{ |
int num_intervals, i; |
memset(ls, 0, sizeof(*ls)); |
/* this may be much larger than ls->num_vrfs... */ |
num_intervals = tc->next_vrf; |
ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0])); |
if (!ls->intervals) |
return false; |
linear_scan_init_live_intervals(ls, tc); |
/* sort intervals by startpoints */ |
qsort(ls->intervals, num_intervals, sizeof(*ls->intervals), |
linear_scan_compare_live_intervals); |
ls->num_regs = num_regs; |
ls->num_free_regs = num_regs; |
ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs)); |
if (!ls->free_regs) { |
FREE(ls->intervals); |
return false; |
} |
/* add in reverse order as we will allocate from the tail */ |
for (i = 0; i < ls->num_regs; i++) |
ls->free_regs[i] = num_regs - i - 1; |
list_inithead(&ls->active_list); |
ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping)); |
if (!ls->vrf_mapping) { |
FREE(ls->intervals); |
FREE(ls->free_regs); |
return false; |
} |
return true; |
} |
/** |
* Allocate registers with linear scan. |
*/ |
static void |
linear_scan_allocation(struct toy_compiler *tc, |
int start_grf, int end_grf, |
int num_grf_per_vrf) |
{ |
const int num_grfs = end_grf - start_grf + 1; |
struct linear_scan ls; |
struct toy_inst *inst; |
if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc)) |
return; |
if (!linear_scan_run(&ls)) { |
tc_fail(tc, "failed to allocate registers"); |
return; |
} |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
int i; |
if (inst->dst.file == TOY_FILE_VRF) { |
const uint32_t val32 = inst->dst.val32; |
int reg = val32 / TOY_REG_WIDTH; |
int subreg = val32 % TOY_REG_WIDTH; |
/* map to GRF */ |
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; |
inst->dst.file = TOY_FILE_GRF; |
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; |
} |
for (i = 0; i < Elements(inst->src); i++) { |
const uint32_t val32 = inst->src[i].val32; |
int reg, subreg; |
if (inst->src[i].file != TOY_FILE_VRF) |
continue; |
reg = val32 / TOY_REG_WIDTH; |
subreg = val32 % TOY_REG_WIDTH; |
/* map to GRF */ |
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf; |
inst->src[i].file = TOY_FILE_GRF; |
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; |
} |
} |
linear_scan_cleanup(&ls); |
} |
/** |
* Trivially allocate registers. |
*/ |
static void |
trivial_allocation(struct toy_compiler *tc, |
int start_grf, int end_grf, |
int num_grf_per_vrf) |
{ |
struct toy_inst *inst; |
int max_grf = -1; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
int i; |
if (inst->dst.file == TOY_FILE_VRF) { |
const uint32_t val32 = inst->dst.val32; |
int reg = val32 / TOY_REG_WIDTH; |
int subreg = val32 % TOY_REG_WIDTH; |
reg = reg * num_grf_per_vrf + start_grf - 1; |
inst->dst.file = TOY_FILE_GRF; |
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg; |
if (reg > max_grf) |
max_grf = reg; |
} |
for (i = 0; i < Elements(inst->src); i++) { |
const uint32_t val32 = inst->src[i].val32; |
int reg, subreg; |
if (inst->src[i].file != TOY_FILE_VRF) |
continue; |
reg = val32 / TOY_REG_WIDTH; |
subreg = val32 % TOY_REG_WIDTH; |
reg = reg * num_grf_per_vrf + start_grf - 1; |
inst->src[i].file = TOY_FILE_GRF; |
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg; |
if (reg > max_grf) |
max_grf = reg; |
} |
} |
if (max_grf + num_grf_per_vrf - 1 > end_grf) |
tc_fail(tc, "failed to allocate registers"); |
} |
/** |
* Allocate GRF registers to VRF registers. |
*/ |
void |
toy_compiler_allocate_registers(struct toy_compiler *tc, |
int start_grf, int end_grf, |
int num_grf_per_vrf) |
{ |
if (true) |
linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf); |
else |
trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_optimize.c |
---|
0,0 → 1,71 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "toy_compiler.h" |
#include "toy_tgsi.h" |
#include "toy_optimize.h" |
/** |
* This just eliminates instructions with null dst so far. |
*/ |
static void |
eliminate_dead_code(struct toy_compiler *tc) |
{ |
struct toy_inst *inst; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
switch (inst->opcode) { |
case BRW_OPCODE_IF: |
case BRW_OPCODE_ELSE: |
case BRW_OPCODE_ENDIF: |
case BRW_OPCODE_WHILE: |
case BRW_OPCODE_BREAK: |
case BRW_OPCODE_CONTINUE: |
case BRW_OPCODE_SEND: |
case BRW_OPCODE_SENDC: |
case BRW_OPCODE_NOP: |
/* never eliminated */ |
break; |
default: |
if (tdst_is_null(inst->dst) || !inst->dst.writemask) { |
/* math is always BRW_CONDITIONAL_NONE */ |
if ((inst->opcode == BRW_OPCODE_MATH || |
inst->cond_modifier == BRW_CONDITIONAL_NONE) && |
!inst->acc_wr_ctrl) |
tc_discard_inst(tc, inst); |
} |
break; |
} |
} |
} |
void |
toy_compiler_optimize(struct toy_compiler *tc) |
{ |
eliminate_dead_code(tc); |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_optimize.h |
---|
0,0 → 1,36 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef TOY_OPTIMIZE_H |
#define TOY_OPTIMIZE_H |
#include "toy_compiler.h" |
void |
toy_compiler_optimize(struct toy_compiler *tc); |
#endif /* TOY_OPTIMIZE_H */ |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_tgsi.c |
---|
0,0 → 1,2677 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#include "tgsi/tgsi_parse.h" |
#include "tgsi/tgsi_info.h" |
#include "tgsi/tgsi_strings.h" |
#include "util/u_hash_table.h" |
#include "toy_helpers.h" |
#include "toy_tgsi.h" |
/* map TGSI opcode to GEN opcode 1-to-1 */ |
static const struct { |
int opcode; |
int num_dst; |
int num_src; |
} aos_simple_opcode_map[TGSI_OPCODE_LAST] = { |
[TGSI_OPCODE_ARL] = { BRW_OPCODE_RNDD, 1, 1 }, |
[TGSI_OPCODE_MOV] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 }, |
[TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 }, |
[TGSI_OPCODE_MUL] = { BRW_OPCODE_MUL, 1, 2 }, |
[TGSI_OPCODE_ADD] = { BRW_OPCODE_ADD, 1, 2 }, |
[TGSI_OPCODE_DP3] = { BRW_OPCODE_DP3, 1, 2 }, |
[TGSI_OPCODE_DP4] = { BRW_OPCODE_DP4, 1, 2 }, |
[TGSI_OPCODE_MIN] = { BRW_OPCODE_SEL, 1, 2 }, |
[TGSI_OPCODE_MAX] = { BRW_OPCODE_SEL, 1, 2 }, |
/* a later pass will move src[2] to accumulator */ |
[TGSI_OPCODE_MAD] = { BRW_OPCODE_MAC, 1, 3 }, |
[TGSI_OPCODE_SUB] = { BRW_OPCODE_ADD, 1, 2 }, |
[TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 }, |
[TGSI_OPCODE_FRC] = { BRW_OPCODE_FRC, 1, 1 }, |
[TGSI_OPCODE_FLR] = { BRW_OPCODE_RNDD, 1, 1 }, |
[TGSI_OPCODE_ROUND] = { BRW_OPCODE_RNDE, 1, 1 }, |
[TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 }, |
[TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 }, |
[TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 }, |
[TGSI_OPCODE_ABS] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_DPH] = { BRW_OPCODE_DPH, 1, 2 }, |
[TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 }, |
[TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 }, |
[TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 }, |
[TGSI_OPCODE_ARR] = { BRW_OPCODE_RNDZ, 1, 1 }, |
[TGSI_OPCODE_DP2] = { BRW_OPCODE_DP2, 1, 2 }, |
[TGSI_OPCODE_IF] = { BRW_OPCODE_IF, 0, 1 }, |
[TGSI_OPCODE_UIF] = { BRW_OPCODE_IF, 0, 1 }, |
[TGSI_OPCODE_ELSE] = { BRW_OPCODE_ELSE, 0, 0 }, |
[TGSI_OPCODE_ENDIF] = { BRW_OPCODE_ENDIF, 0, 0 }, |
[TGSI_OPCODE_I2F] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_NOT] = { BRW_OPCODE_NOT, 1, 1 }, |
[TGSI_OPCODE_TRUNC] = { BRW_OPCODE_RNDZ, 1, 1 }, |
[TGSI_OPCODE_SHL] = { BRW_OPCODE_SHL, 1, 2 }, |
[TGSI_OPCODE_AND] = { BRW_OPCODE_AND, 1, 2 }, |
[TGSI_OPCODE_OR] = { BRW_OPCODE_OR, 1, 2 }, |
[TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 }, |
[TGSI_OPCODE_XOR] = { BRW_OPCODE_XOR, 1, 2 }, |
[TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 }, |
[TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 }, |
[TGSI_OPCODE_NOP] = { BRW_OPCODE_NOP, 0, 0 }, |
[TGSI_OPCODE_KILL_IF] = { TOY_OPCODE_KIL, 0, 1 }, |
[TGSI_OPCODE_END] = { BRW_OPCODE_NOP, 0, 0 }, |
[TGSI_OPCODE_F2I] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 }, |
[TGSI_OPCODE_IMAX] = { BRW_OPCODE_SEL, 1, 2 }, |
[TGSI_OPCODE_IMIN] = { BRW_OPCODE_SEL, 1, 2 }, |
[TGSI_OPCODE_INEG] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_ISHR] = { BRW_OPCODE_ASR, 1, 2 }, |
[TGSI_OPCODE_F2U] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_U2F] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_UADD] = { BRW_OPCODE_ADD, 1, 2 }, |
[TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 }, |
/* a later pass will move src[2] to accumulator */ |
[TGSI_OPCODE_UMAD] = { BRW_OPCODE_MAC, 1, 3 }, |
[TGSI_OPCODE_UMAX] = { BRW_OPCODE_SEL, 1, 2 }, |
[TGSI_OPCODE_UMIN] = { BRW_OPCODE_SEL, 1, 2 }, |
[TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 }, |
[TGSI_OPCODE_UMUL] = { BRW_OPCODE_MUL, 1, 2 }, |
[TGSI_OPCODE_USHR] = { BRW_OPCODE_SHR, 1, 2 }, |
[TGSI_OPCODE_UARL] = { BRW_OPCODE_MOV, 1, 1 }, |
[TGSI_OPCODE_IABS] = { BRW_OPCODE_MOV, 1, 1 }, |
}; |
static void |
aos_simple(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
int opcode; |
int cond_modifier = BRW_CONDITIONAL_NONE; |
int num_dst = tgsi_inst->Instruction.NumDstRegs; |
int num_src = tgsi_inst->Instruction.NumSrcRegs; |
int i; |
opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode; |
assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst); |
assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src); |
if (!opcode) { |
assert(!"invalid aos_simple() call"); |
return; |
} |
/* no need to emit nop */ |
if (opcode == BRW_OPCODE_NOP) |
return; |
inst = tc_add(tc); |
if (!inst) |
return; |
inst->opcode = opcode; |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_MIN: |
case TGSI_OPCODE_IMIN: |
case TGSI_OPCODE_UMIN: |
cond_modifier = BRW_CONDITIONAL_L; |
break; |
case TGSI_OPCODE_MAX: |
case TGSI_OPCODE_IMAX: |
case TGSI_OPCODE_UMAX: |
cond_modifier = BRW_CONDITIONAL_GE; |
break; |
case TGSI_OPCODE_SUB: |
src[1] = tsrc_negate(src[1]); |
break; |
case TGSI_OPCODE_ABS: |
case TGSI_OPCODE_IABS: |
src[0] = tsrc_absolute(src[0]); |
break; |
case TGSI_OPCODE_IF: |
cond_modifier = BRW_CONDITIONAL_NEQ; |
num_src = 2; |
assert(src[0].type == TOY_TYPE_F); |
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); |
src[1] = tsrc_imm_f(0.0f); |
break; |
case TGSI_OPCODE_UIF: |
cond_modifier = BRW_CONDITIONAL_NEQ; |
num_src = 2; |
assert(src[0].type == TOY_TYPE_UD); |
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); |
src[1] = tsrc_imm_d(0); |
break; |
case TGSI_OPCODE_INEG: |
src[0] = tsrc_negate(src[0]); |
break; |
case TGSI_OPCODE_RCP: |
case TGSI_OPCODE_RSQ: |
case TGSI_OPCODE_EX2: |
case TGSI_OPCODE_LG2: |
case TGSI_OPCODE_COS: |
case TGSI_OPCODE_SIN: |
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); |
break; |
case TGSI_OPCODE_POW: |
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); |
src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X); |
break; |
} |
inst->cond_modifier = cond_modifier; |
if (num_dst) { |
assert(num_dst == 1); |
inst->dst = dst[0]; |
} |
assert(num_src <= Elements(inst->src)); |
for (i = 0; i < num_src; i++) |
inst->src[i] = src[i]; |
} |
static void |
aos_set_on_cond(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
int cond; |
struct toy_src zero, one; |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_SLT: |
case TGSI_OPCODE_ISLT: |
case TGSI_OPCODE_USLT: |
cond = BRW_CONDITIONAL_L; |
break; |
case TGSI_OPCODE_SGE: |
case TGSI_OPCODE_ISGE: |
case TGSI_OPCODE_USGE: |
cond = BRW_CONDITIONAL_GE; |
break; |
case TGSI_OPCODE_SEQ: |
case TGSI_OPCODE_USEQ: |
cond = BRW_CONDITIONAL_EQ; |
break; |
case TGSI_OPCODE_SGT: |
cond = BRW_CONDITIONAL_G; |
break; |
case TGSI_OPCODE_SLE: |
cond = BRW_CONDITIONAL_LE; |
break; |
case TGSI_OPCODE_SNE: |
case TGSI_OPCODE_USNE: |
cond = BRW_CONDITIONAL_NEQ; |
break; |
default: |
assert(!"invalid aos_set_on_cond() call"); |
return; |
} |
/* note that for integer versions, all bits are set */ |
switch (dst[0].type) { |
case TOY_TYPE_F: |
default: |
zero = tsrc_imm_f(0.0f); |
one = tsrc_imm_f(1.0f); |
break; |
case TOY_TYPE_D: |
zero = tsrc_imm_d(0); |
one = tsrc_imm_d(-1); |
break; |
case TOY_TYPE_UD: |
zero = tsrc_imm_ud(0); |
one = tsrc_imm_ud(~0); |
break; |
} |
tc_MOV(tc, dst[0], zero); |
tc_CMP(tc, tdst_null(), src[0], src[1], cond); |
inst = tc_MOV(tc, dst[0], one); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
} |
static void |
aos_compare(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
struct toy_src zero; |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_CMP: |
zero = tsrc_imm_f(0.0f); |
break; |
case TGSI_OPCODE_UCMP: |
zero = tsrc_imm_ud(0); |
break; |
default: |
assert(!"invalid aos_compare() call"); |
return; |
} |
tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L); |
inst = tc_SEL(tc, dst[0], src[1], src[2], BRW_CONDITIONAL_NONE); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
} |
static void |
aos_set_sign(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
struct toy_src zero, one, neg_one; |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_SSG: |
zero = tsrc_imm_f(0.0f); |
one = tsrc_imm_f(1.0f); |
neg_one = tsrc_imm_f(-1.0f); |
break; |
case TGSI_OPCODE_ISSG: |
zero = tsrc_imm_d(0); |
one = tsrc_imm_d(1); |
neg_one = tsrc_imm_d(-1); |
break; |
default: |
assert(!"invalid aos_set_sign() call"); |
return; |
} |
tc_MOV(tc, dst[0], zero); |
tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_G); |
inst = tc_MOV(tc, dst[0], one); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L); |
inst = tc_MOV(tc, dst[0], neg_one); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
} |
static void |
aos_tex(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
enum toy_opcode opcode; |
int i; |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_TEX: |
opcode = TOY_OPCODE_TGSI_TEX; |
break; |
case TGSI_OPCODE_TXD: |
opcode = TOY_OPCODE_TGSI_TXD; |
break; |
case TGSI_OPCODE_TXP: |
opcode = TOY_OPCODE_TGSI_TXP; |
break; |
case TGSI_OPCODE_TXB: |
opcode = TOY_OPCODE_TGSI_TXB; |
break; |
case TGSI_OPCODE_TXL: |
opcode = TOY_OPCODE_TGSI_TXL; |
break; |
case TGSI_OPCODE_TXF: |
opcode = TOY_OPCODE_TGSI_TXF; |
break; |
case TGSI_OPCODE_TXQ: |
opcode = TOY_OPCODE_TGSI_TXQ; |
break; |
case TGSI_OPCODE_TXQ_LZ: |
opcode = TOY_OPCODE_TGSI_TXQ_LZ; |
break; |
case TGSI_OPCODE_TEX2: |
opcode = TOY_OPCODE_TGSI_TEX2; |
break; |
case TGSI_OPCODE_TXB2: |
opcode = TOY_OPCODE_TGSI_TXB2; |
break; |
case TGSI_OPCODE_TXL2: |
opcode = TOY_OPCODE_TGSI_TXL2; |
break; |
default: |
assert(!"unsupported texturing opcode"); |
return; |
break; |
} |
assert(tgsi_inst->Instruction.Texture); |
inst = tc_add(tc); |
inst->opcode = opcode; |
inst->tex.target = tgsi_inst->Texture.Texture; |
assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src)); |
assert(tgsi_inst->Instruction.NumDstRegs == 1); |
inst->dst = dst[0]; |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) |
inst->src[i] = src[i]; |
for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) |
tc_fail(tc, "texelFetchOffset unsupported"); |
} |
static void |
aos_sample(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
enum toy_opcode opcode; |
int i; |
assert(!"sampling untested"); |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_SAMPLE: |
opcode = TOY_OPCODE_TGSI_SAMPLE; |
break; |
case TGSI_OPCODE_SAMPLE_I: |
opcode = TOY_OPCODE_TGSI_SAMPLE_I; |
break; |
case TGSI_OPCODE_SAMPLE_I_MS: |
opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS; |
break; |
case TGSI_OPCODE_SAMPLE_B: |
opcode = TOY_OPCODE_TGSI_SAMPLE_B; |
break; |
case TGSI_OPCODE_SAMPLE_C: |
opcode = TOY_OPCODE_TGSI_SAMPLE_C; |
break; |
case TGSI_OPCODE_SAMPLE_C_LZ: |
opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ; |
break; |
case TGSI_OPCODE_SAMPLE_D: |
opcode = TOY_OPCODE_TGSI_SAMPLE_D; |
break; |
case TGSI_OPCODE_SAMPLE_L: |
opcode = TOY_OPCODE_TGSI_SAMPLE_L; |
break; |
case TGSI_OPCODE_GATHER4: |
opcode = TOY_OPCODE_TGSI_GATHER4; |
break; |
case TGSI_OPCODE_SVIEWINFO: |
opcode = TOY_OPCODE_TGSI_SVIEWINFO; |
break; |
case TGSI_OPCODE_SAMPLE_POS: |
opcode = TOY_OPCODE_TGSI_SAMPLE_POS; |
break; |
case TGSI_OPCODE_SAMPLE_INFO: |
opcode = TOY_OPCODE_TGSI_SAMPLE_INFO; |
break; |
default: |
assert(!"unsupported sampling opcode"); |
return; |
break; |
} |
inst = tc_add(tc); |
inst->opcode = opcode; |
assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src)); |
assert(tgsi_inst->Instruction.NumDstRegs == 1); |
inst->dst = dst[0]; |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) |
inst->src[i] = src[i]; |
} |
static void |
aos_LIT(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f)); |
if (!(dst[0].writemask & TOY_WRITEMASK_YZ)) |
return; |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f)); |
tc_CMP(tc, tdst_null(), |
tsrc_swizzle1(src[0], TOY_SWIZZLE_X), |
tsrc_imm_f(0.0f), |
BRW_CONDITIONAL_G); |
inst = tc_MOV(tc, |
tdst_writemask(dst[0], TOY_WRITEMASK_Y), |
tsrc_swizzle1(src[0], TOY_SWIZZLE_X)); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
/* clamp W to (-128, 128)? */ |
inst = tc_POW(tc, |
tdst_writemask(dst[0], TOY_WRITEMASK_Z), |
tsrc_swizzle1(src[0], TOY_SWIZZLE_Y), |
tsrc_swizzle1(src[0], TOY_SWIZZLE_W)); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
} |
static void |
aos_EXP(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); |
if (dst[0].writemask & TOY_WRITEMASK_X) { |
struct toy_dst tmp = |
tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X)); |
tc_RNDD(tc, tmp, src0); |
/* construct the floating point number manually */ |
tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127)); |
tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)), |
tsrc_from(tmp), tsrc_imm_d(23)); |
} |
tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0); |
tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); |
} |
static void |
aos_LOG(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X); |
if (dst[0].writemask & TOY_WRITEMASK_XY) { |
struct toy_dst tmp; |
tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X)); |
/* exponent */ |
tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23)); |
tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), |
tsrc_from(tmp), tsrc_imm_d(-127)); |
/* mantissa */ |
tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1)); |
tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y), |
tsrc_from(tmp), tsrc_imm_d(127 << 23)); |
} |
tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); |
} |
static void |
aos_DST(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f)); |
tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]); |
} |
static void |
aos_LRP(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f)); |
tc_MUL(tc, tmp, tsrc_from(tmp), src[2]); |
tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp)); |
} |
static void |
aos_CND(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
assert(!"CND untested"); |
tc_CMP(tc, tdst_null(), src[2], tsrc_imm_f(0.5f), BRW_CONDITIONAL_G); |
inst = tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_NONE); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
} |
static void |
aos_DP2A(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
assert(!"DP2A untested"); |
tc_DP2(tc, tmp, src[0], src[1]); |
tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]); |
} |
static void |
aos_CLAMP(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
assert(!"CLAMP untested"); |
tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_GE); |
tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), BRW_CONDITIONAL_L); |
} |
static void |
aos_XPD(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ), |
tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X, |
TOY_SWIZZLE_Y, TOY_SWIZZLE_W), |
tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z, |
TOY_SWIZZLE_X, TOY_SWIZZLE_W)); |
tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ), |
tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z, |
TOY_SWIZZLE_X, TOY_SWIZZLE_W), |
tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X, |
TOY_SWIZZLE_Y, TOY_SWIZZLE_W), |
tsrc_negate(tsrc_from(tmp))); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), |
tsrc_imm_f(1.0f)); |
} |
static void |
aos_PK2H(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X)); |
const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y)); |
struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc)); |
assert(!"PK2H untested"); |
tc_SHL(tc, tmp, h2, tsrc_imm_ud(16)); |
tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp)); |
} |
static void |
aos_SFL(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
assert(!"SFL untested"); |
tc_MOV(tc, dst[0], tsrc_imm_f(0.0f)); |
} |
static void |
aos_STR(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
assert(!"STR untested"); |
tc_MOV(tc, dst[0], tsrc_imm_f(1.0f)); |
} |
static void |
aos_UP2H(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
assert(!"UP2H untested"); |
tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ), |
tsrc_ud(src[0]), tsrc_imm_ud(0xffff)); |
tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW), |
tsrc_ud(src[0]), tsrc_imm_ud(16)); |
} |
static void |
aos_SCS(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
assert(!"SCS untested"); |
tc_add1(tc, TOY_OPCODE_COS, |
tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]); |
tc_add1(tc, TOY_OPCODE_SIN, |
tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f)); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); |
} |
static void |
aos_NRM(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
assert(!"NRM untested"); |
tc_DP3(tc, tmp, src[0], src[0]); |
tc_INV(tc, tmp, tsrc_from(tmp)); |
tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ), |
src[0], tsrc_from(tmp)); |
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f)); |
} |
static void |
aos_DIV(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
assert(!"DIV untested"); |
tc_INV(tc, tmp, src[1]); |
tc_MUL(tc, dst[0], src[0], tsrc_from(tmp)); |
} |
static void |
aos_BRK(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
tc_add0(tc, BRW_OPCODE_BREAK); |
} |
static void |
aos_CEIL(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
tc_RNDD(tc, tmp, tsrc_negate(src[0])); |
tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp))); |
} |
static void |
aos_SAD(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
assert(!"SAD untested"); |
tc_ADD(tc, tmp, src[0], tsrc_negate(src[1])); |
tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]); |
} |
static void |
aos_CONT(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
tc_add0(tc, BRW_OPCODE_CONTINUE); |
} |
static void |
aos_BGNLOOP(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_inst *inst; |
inst = tc_add0(tc, BRW_OPCODE_DO); |
/* this is just a marker */ |
inst->marker = true; |
} |
static void |
aos_ENDLOOP(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
tc_add0(tc, BRW_OPCODE_WHILE); |
} |
static void |
aos_NRM4(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
struct toy_dst tmp = tc_alloc_tmp(tc); |
assert(!"NRM4 untested"); |
tc_DP4(tc, tmp, src[0], src[0]); |
tc_INV(tc, tmp, tsrc_from(tmp)); |
tc_MUL(tc, dst[0], tsrc_swizzle1(src[0], TOY_SWIZZLE_X), tsrc_from(tmp)); |
} |
static void |
aos_unsupported(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src) |
{ |
const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode); |
ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name); |
tc_fail(tc, "unsupported TGSI instruction"); |
} |
static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { |
[TGSI_OPCODE_ARL] = aos_simple, |
[TGSI_OPCODE_MOV] = aos_simple, |
[TGSI_OPCODE_LIT] = aos_LIT, |
[TGSI_OPCODE_RCP] = aos_simple, |
[TGSI_OPCODE_RSQ] = aos_simple, |
[TGSI_OPCODE_EXP] = aos_EXP, |
[TGSI_OPCODE_LOG] = aos_LOG, |
[TGSI_OPCODE_MUL] = aos_simple, |
[TGSI_OPCODE_ADD] = aos_simple, |
[TGSI_OPCODE_DP3] = aos_simple, |
[TGSI_OPCODE_DP4] = aos_simple, |
[TGSI_OPCODE_DST] = aos_DST, |
[TGSI_OPCODE_MIN] = aos_simple, |
[TGSI_OPCODE_MAX] = aos_simple, |
[TGSI_OPCODE_SLT] = aos_set_on_cond, |
[TGSI_OPCODE_SGE] = aos_set_on_cond, |
[TGSI_OPCODE_MAD] = aos_simple, |
[TGSI_OPCODE_SUB] = aos_simple, |
[TGSI_OPCODE_LRP] = aos_LRP, |
[TGSI_OPCODE_CND] = aos_CND, |
[TGSI_OPCODE_SQRT] = aos_simple, |
[TGSI_OPCODE_DP2A] = aos_DP2A, |
[22] = aos_unsupported, |
[23] = aos_unsupported, |
[TGSI_OPCODE_FRC] = aos_simple, |
[TGSI_OPCODE_CLAMP] = aos_CLAMP, |
[TGSI_OPCODE_FLR] = aos_simple, |
[TGSI_OPCODE_ROUND] = aos_simple, |
[TGSI_OPCODE_EX2] = aos_simple, |
[TGSI_OPCODE_LG2] = aos_simple, |
[TGSI_OPCODE_POW] = aos_simple, |
[TGSI_OPCODE_XPD] = aos_XPD, |
[32] = aos_unsupported, |
[TGSI_OPCODE_ABS] = aos_simple, |
[TGSI_OPCODE_RCC] = aos_unsupported, |
[TGSI_OPCODE_DPH] = aos_simple, |
[TGSI_OPCODE_COS] = aos_simple, |
[TGSI_OPCODE_DDX] = aos_unsupported, |
[TGSI_OPCODE_DDY] = aos_unsupported, |
[TGSI_OPCODE_KILL] = aos_simple, |
[TGSI_OPCODE_PK2H] = aos_PK2H, |
[TGSI_OPCODE_PK2US] = aos_unsupported, |
[TGSI_OPCODE_PK4B] = aos_unsupported, |
[TGSI_OPCODE_PK4UB] = aos_unsupported, |
[TGSI_OPCODE_RFL] = aos_unsupported, |
[TGSI_OPCODE_SEQ] = aos_set_on_cond, |
[TGSI_OPCODE_SFL] = aos_SFL, |
[TGSI_OPCODE_SGT] = aos_set_on_cond, |
[TGSI_OPCODE_SIN] = aos_simple, |
[TGSI_OPCODE_SLE] = aos_set_on_cond, |
[TGSI_OPCODE_SNE] = aos_set_on_cond, |
[TGSI_OPCODE_STR] = aos_STR, |
[TGSI_OPCODE_TEX] = aos_tex, |
[TGSI_OPCODE_TXD] = aos_tex, |
[TGSI_OPCODE_TXP] = aos_tex, |
[TGSI_OPCODE_UP2H] = aos_UP2H, |
[TGSI_OPCODE_UP2US] = aos_unsupported, |
[TGSI_OPCODE_UP4B] = aos_unsupported, |
[TGSI_OPCODE_UP4UB] = aos_unsupported, |
[TGSI_OPCODE_X2D] = aos_unsupported, |
[TGSI_OPCODE_ARA] = aos_unsupported, |
[TGSI_OPCODE_ARR] = aos_simple, |
[TGSI_OPCODE_BRA] = aos_unsupported, |
[TGSI_OPCODE_CAL] = aos_unsupported, |
[TGSI_OPCODE_RET] = aos_unsupported, |
[TGSI_OPCODE_SSG] = aos_set_sign, |
[TGSI_OPCODE_CMP] = aos_compare, |
[TGSI_OPCODE_SCS] = aos_SCS, |
[TGSI_OPCODE_TXB] = aos_tex, |
[TGSI_OPCODE_NRM] = aos_NRM, |
[TGSI_OPCODE_DIV] = aos_DIV, |
[TGSI_OPCODE_DP2] = aos_simple, |
[TGSI_OPCODE_TXL] = aos_tex, |
[TGSI_OPCODE_BRK] = aos_BRK, |
[TGSI_OPCODE_IF] = aos_simple, |
[TGSI_OPCODE_UIF] = aos_simple, |
[76] = aos_unsupported, |
[TGSI_OPCODE_ELSE] = aos_simple, |
[TGSI_OPCODE_ENDIF] = aos_simple, |
[79] = aos_unsupported, |
[80] = aos_unsupported, |
[TGSI_OPCODE_PUSHA] = aos_unsupported, |
[TGSI_OPCODE_POPA] = aos_unsupported, |
[TGSI_OPCODE_CEIL] = aos_CEIL, |
[TGSI_OPCODE_I2F] = aos_simple, |
[TGSI_OPCODE_NOT] = aos_simple, |
[TGSI_OPCODE_TRUNC] = aos_simple, |
[TGSI_OPCODE_SHL] = aos_simple, |
[88] = aos_unsupported, |
[TGSI_OPCODE_AND] = aos_simple, |
[TGSI_OPCODE_OR] = aos_simple, |
[TGSI_OPCODE_MOD] = aos_simple, |
[TGSI_OPCODE_XOR] = aos_simple, |
[TGSI_OPCODE_SAD] = aos_SAD, |
[TGSI_OPCODE_TXF] = aos_tex, |
[TGSI_OPCODE_TXQ] = aos_tex, |
[TGSI_OPCODE_CONT] = aos_CONT, |
[TGSI_OPCODE_EMIT] = aos_simple, |
[TGSI_OPCODE_ENDPRIM] = aos_simple, |
[TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP, |
[TGSI_OPCODE_BGNSUB] = aos_unsupported, |
[TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP, |
[TGSI_OPCODE_ENDSUB] = aos_unsupported, |
[TGSI_OPCODE_TXQ_LZ] = aos_tex, |
[104] = aos_unsupported, |
[105] = aos_unsupported, |
[106] = aos_unsupported, |
[TGSI_OPCODE_NOP] = aos_simple, |
[108] = aos_unsupported, |
[109] = aos_unsupported, |
[110] = aos_unsupported, |
[111] = aos_unsupported, |
[TGSI_OPCODE_NRM4] = aos_NRM4, |
[TGSI_OPCODE_CALLNZ] = aos_unsupported, |
[TGSI_OPCODE_BREAKC] = aos_unsupported, |
[TGSI_OPCODE_KILL_IF] = aos_simple, |
[TGSI_OPCODE_END] = aos_simple, |
[118] = aos_unsupported, |
[TGSI_OPCODE_F2I] = aos_simple, |
[TGSI_OPCODE_IDIV] = aos_simple, |
[TGSI_OPCODE_IMAX] = aos_simple, |
[TGSI_OPCODE_IMIN] = aos_simple, |
[TGSI_OPCODE_INEG] = aos_simple, |
[TGSI_OPCODE_ISGE] = aos_set_on_cond, |
[TGSI_OPCODE_ISHR] = aos_simple, |
[TGSI_OPCODE_ISLT] = aos_set_on_cond, |
[TGSI_OPCODE_F2U] = aos_simple, |
[TGSI_OPCODE_U2F] = aos_simple, |
[TGSI_OPCODE_UADD] = aos_simple, |
[TGSI_OPCODE_UDIV] = aos_simple, |
[TGSI_OPCODE_UMAD] = aos_simple, |
[TGSI_OPCODE_UMAX] = aos_simple, |
[TGSI_OPCODE_UMIN] = aos_simple, |
[TGSI_OPCODE_UMOD] = aos_simple, |
[TGSI_OPCODE_UMUL] = aos_simple, |
[TGSI_OPCODE_USEQ] = aos_set_on_cond, |
[TGSI_OPCODE_USGE] = aos_set_on_cond, |
[TGSI_OPCODE_USHR] = aos_simple, |
[TGSI_OPCODE_USLT] = aos_set_on_cond, |
[TGSI_OPCODE_USNE] = aos_set_on_cond, |
[TGSI_OPCODE_SWITCH] = aos_unsupported, |
[TGSI_OPCODE_CASE] = aos_unsupported, |
[TGSI_OPCODE_DEFAULT] = aos_unsupported, |
[TGSI_OPCODE_ENDSWITCH] = aos_unsupported, |
[TGSI_OPCODE_SAMPLE] = aos_sample, |
[TGSI_OPCODE_SAMPLE_I] = aos_sample, |
[TGSI_OPCODE_SAMPLE_I_MS] = aos_sample, |
[TGSI_OPCODE_SAMPLE_B] = aos_sample, |
[TGSI_OPCODE_SAMPLE_C] = aos_sample, |
[TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample, |
[TGSI_OPCODE_SAMPLE_D] = aos_sample, |
[TGSI_OPCODE_SAMPLE_L] = aos_sample, |
[TGSI_OPCODE_GATHER4] = aos_sample, |
[TGSI_OPCODE_SVIEWINFO] = aos_sample, |
[TGSI_OPCODE_SAMPLE_POS] = aos_sample, |
[TGSI_OPCODE_SAMPLE_INFO] = aos_sample, |
[TGSI_OPCODE_UARL] = aos_simple, |
[TGSI_OPCODE_UCMP] = aos_compare, |
[TGSI_OPCODE_IABS] = aos_simple, |
[TGSI_OPCODE_ISSG] = aos_set_sign, |
[TGSI_OPCODE_LOAD] = aos_unsupported, |
[TGSI_OPCODE_STORE] = aos_unsupported, |
[TGSI_OPCODE_MFENCE] = aos_unsupported, |
[TGSI_OPCODE_LFENCE] = aos_unsupported, |
[TGSI_OPCODE_SFENCE] = aos_unsupported, |
[TGSI_OPCODE_BARRIER] = aos_unsupported, |
[TGSI_OPCODE_ATOMUADD] = aos_unsupported, |
[TGSI_OPCODE_ATOMXCHG] = aos_unsupported, |
[TGSI_OPCODE_ATOMCAS] = aos_unsupported, |
[TGSI_OPCODE_ATOMAND] = aos_unsupported, |
[TGSI_OPCODE_ATOMOR] = aos_unsupported, |
[TGSI_OPCODE_ATOMXOR] = aos_unsupported, |
[TGSI_OPCODE_ATOMUMIN] = aos_unsupported, |
[TGSI_OPCODE_ATOMUMAX] = aos_unsupported, |
[TGSI_OPCODE_ATOMIMIN] = aos_unsupported, |
[TGSI_OPCODE_ATOMIMAX] = aos_unsupported, |
[TGSI_OPCODE_TEX2] = aos_tex, |
[TGSI_OPCODE_TXB2] = aos_tex, |
[TGSI_OPCODE_TXL2] = aos_tex, |
}; |
static void |
soa_passthrough(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
const toy_tgsi_translate translate = |
aos_translate_table[tgsi_inst->Instruction.Opcode]; |
translate(tc, tgsi_inst, dst_, src_); |
} |
static void |
soa_per_channel(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4]; |
struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4]; |
int i, ch; |
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) |
tdst_transpose(dst_[i], dst[i]); |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) |
tsrc_transpose(src_[i], src[i]); |
/* emit the same instruction four times for the four channels */ |
for (ch = 0; ch < 4; ch++) { |
struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS]; |
struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS]; |
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) |
aos_dst[i] = dst[i][ch]; |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) |
aos_src[i] = src[i][ch]; |
aos_translate_table[tgsi_inst->Instruction.Opcode](tc, |
tgsi_inst, aos_dst, aos_src); |
} |
} |
static void |
soa_scalar_replicate(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4], tmp; |
struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS]; |
int opcode, i; |
assert(tgsi_inst->Instruction.NumDstRegs == 1); |
tdst_transpose(dst_[0], dst0); |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { |
struct toy_src tmp[4]; |
tsrc_transpose(src_[i], tmp); |
/* only the X channels */ |
srcx[i] = tmp[0]; |
} |
tmp = tc_alloc_tmp(tc); |
opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode; |
assert(opcode); |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_RCP: |
case TGSI_OPCODE_RSQ: |
case TGSI_OPCODE_SQRT: |
case TGSI_OPCODE_EX2: |
case TGSI_OPCODE_LG2: |
case TGSI_OPCODE_COS: |
case TGSI_OPCODE_SIN: |
tc_add1(tc, opcode, tmp, srcx[0]); |
break; |
case TGSI_OPCODE_POW: |
tc_add2(tc, opcode, tmp, srcx[0], srcx[1]); |
break; |
default: |
assert(!"invalid soa_scalar_replicate() call"); |
return; |
} |
/* replicate the result */ |
for (i = 0; i < 4; i++) |
tc_MOV(tc, dst0[i], tsrc_from(tmp)); |
} |
static void |
soa_dot_product(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4], tmp; |
struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4]; |
int i; |
tdst_transpose(dst_[0], dst0); |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) |
tsrc_transpose(src_[i], src[i]); |
tmp = tc_alloc_tmp(tc); |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_DP2: |
tc_MUL(tc, tmp, src[0][1], src[1][1]); |
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); |
break; |
case TGSI_OPCODE_DP2A: |
tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]); |
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); |
break; |
case TGSI_OPCODE_DP3: |
tc_MUL(tc, tmp, src[0][2], src[1][2]); |
tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); |
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); |
break; |
case TGSI_OPCODE_DPH: |
tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]); |
tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); |
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); |
break; |
case TGSI_OPCODE_DP4: |
tc_MUL(tc, tmp, src[0][3], src[1][3]); |
tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp)); |
tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp)); |
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp)); |
break; |
default: |
assert(!"invalid soa_dot_product() call"); |
return; |
} |
for (i = 0; i < 4; i++) |
tc_MOV(tc, dst0[i], tsrc_from(tmp)); |
} |
static void |
soa_partial_derivative(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX) |
tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]); |
else |
tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]); |
} |
static void |
soa_if(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_src src0[4]; |
assert(tsrc_is_swizzle1(src_[0])); |
tsrc_transpose(src_[0], src0); |
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF) |
tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_NEQ); |
else |
tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), BRW_CONDITIONAL_NEQ); |
} |
static void |
soa_LIT(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_inst *inst; |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f)); |
tc_MOV(tc, dst0[1], src0[0]); |
tc_POW(tc, dst0[2], src0[1], src0[3]); |
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); |
/* |
* POW is calculated first because math with pred_ctrl is broken here. |
* But, why? |
*/ |
tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_L); |
inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f)); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f)); |
inst->pred_ctrl = BRW_PREDICATE_NORMAL; |
} |
static void |
soa_EXP(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
assert(!"SoA EXP untested"); |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
if (!tdst_is_null(dst0[0])) { |
struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc)); |
tc_RNDD(tc, tmp, src0[0]); |
/* construct the floating point number manually */ |
tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127)); |
tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23)); |
} |
tc_FRC(tc, dst0[1], src0[0]); |
tc_EXP(tc, dst0[2], src0[0]); |
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); |
} |
static void |
soa_LOG(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
assert(!"SoA LOG untested"); |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
if (dst_[0].writemask & TOY_WRITEMASK_XY) { |
struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc)); |
/* exponent */ |
tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23)); |
tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127)); |
/* mantissa */ |
tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1)); |
tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23)); |
} |
tc_LOG(tc, dst0[2], src0[0]); |
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); |
} |
static void |
soa_DST(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4]; |
struct toy_src src[2][4]; |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src[0]); |
tsrc_transpose(src_[1], src[1]); |
tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f)); |
tc_MUL(tc, dst0[1], src[0][1], src[1][1]); |
tc_MOV(tc, dst0[2], src[0][2]); |
tc_MOV(tc, dst0[3], src[1][3]); |
} |
static void |
soa_XPD(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4]; |
struct toy_src src[2][4]; |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src[0]); |
tsrc_transpose(src_[1], src[1]); |
/* dst.x = src0.y * src1.z - src1.y * src0.z */ |
tc_MUL(tc, dst0[0], src[0][2], src[1][1]); |
tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0]))); |
/* dst.y = src0.z * src1.x - src1.z * src0.x */ |
tc_MUL(tc, dst0[1], src[0][0], src[1][2]); |
tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1]))); |
/* dst.z = src0.x * src1.y - src1.x * src0.y */ |
tc_MUL(tc, dst0[2], src[0][1], src[1][0]); |
tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2]))); |
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); |
} |
static void |
soa_PK2H(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc)); |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
int i; |
assert(!"SoA PK2H untested"); |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16)); |
tc_OR(tc, tmp, src0[0], tsrc_from(tmp)); |
for (i = 0; i < 4; i++) |
tc_MOV(tc, dst0[i], tsrc_from(tmp)); |
} |
static void |
soa_UP2H(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
assert(!"SoA UP2H untested"); |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff)); |
tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16)); |
tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff)); |
tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16)); |
} |
static void |
soa_SCS(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]); |
tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]); |
tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f)); |
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); |
} |
static void |
soa_NRM(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
const struct toy_dst tmp = tc_alloc_tmp(tc); |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
assert(!"SoA NRM untested"); |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
tc_MUL(tc, tmp, src0[2], src0[2]); |
tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp)); |
tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp)); |
tc_INV(tc, tmp, tsrc_from(tmp)); |
tc_MUL(tc, dst0[0], src0[0], tsrc_from(tmp)); |
tc_MUL(tc, dst0[1], src0[1], tsrc_from(tmp)); |
tc_MUL(tc, dst0[2], src0[2], tsrc_from(tmp)); |
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f)); |
} |
static void |
soa_NRM4(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
const struct toy_dst tmp = tc_alloc_tmp(tc); |
struct toy_dst dst0[4]; |
struct toy_src src0[4]; |
int i; |
assert(!"SoA NRM4 untested"); |
tdst_transpose(dst_[0], dst0); |
tsrc_transpose(src_[0], src0); |
tc_MUL(tc, tmp, src0[3], src0[3]); |
tc_MAC(tc, tmp, src0[2], src0[2], tsrc_from(tmp)); |
tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp)); |
tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp)); |
tc_INV(tc, tmp, tsrc_from(tmp)); |
for (i = 0; i < 4; i++) |
tc_MUL(tc, dst0[i], src0[0], tsrc_from(tmp)); |
} |
static void |
soa_unsupported(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst_, |
struct toy_src *src_) |
{ |
const struct tgsi_opcode_info *info = |
tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode); |
ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n", |
info->mnemonic); |
tc_fail(tc, "unsupported TGSI instruction in SoA form"); |
} |
static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { |
[TGSI_OPCODE_ARL] = soa_per_channel, |
[TGSI_OPCODE_MOV] = soa_per_channel, |
[TGSI_OPCODE_LIT] = soa_LIT, |
[TGSI_OPCODE_RCP] = soa_scalar_replicate, |
[TGSI_OPCODE_RSQ] = soa_scalar_replicate, |
[TGSI_OPCODE_EXP] = soa_EXP, |
[TGSI_OPCODE_LOG] = soa_LOG, |
[TGSI_OPCODE_MUL] = soa_per_channel, |
[TGSI_OPCODE_ADD] = soa_per_channel, |
[TGSI_OPCODE_DP3] = soa_dot_product, |
[TGSI_OPCODE_DP4] = soa_dot_product, |
[TGSI_OPCODE_DST] = soa_DST, |
[TGSI_OPCODE_MIN] = soa_per_channel, |
[TGSI_OPCODE_MAX] = soa_per_channel, |
[TGSI_OPCODE_SLT] = soa_per_channel, |
[TGSI_OPCODE_SGE] = soa_per_channel, |
[TGSI_OPCODE_MAD] = soa_per_channel, |
[TGSI_OPCODE_SUB] = soa_per_channel, |
[TGSI_OPCODE_LRP] = soa_per_channel, |
[TGSI_OPCODE_CND] = soa_per_channel, |
[TGSI_OPCODE_SQRT] = soa_scalar_replicate, |
[TGSI_OPCODE_DP2A] = soa_dot_product, |
[22] = soa_unsupported, |
[23] = soa_unsupported, |
[TGSI_OPCODE_FRC] = soa_per_channel, |
[TGSI_OPCODE_CLAMP] = soa_per_channel, |
[TGSI_OPCODE_FLR] = soa_per_channel, |
[TGSI_OPCODE_ROUND] = soa_per_channel, |
[TGSI_OPCODE_EX2] = soa_scalar_replicate, |
[TGSI_OPCODE_LG2] = soa_scalar_replicate, |
[TGSI_OPCODE_POW] = soa_scalar_replicate, |
[TGSI_OPCODE_XPD] = soa_XPD, |
[32] = soa_unsupported, |
[TGSI_OPCODE_ABS] = soa_per_channel, |
[TGSI_OPCODE_RCC] = soa_unsupported, |
[TGSI_OPCODE_DPH] = soa_dot_product, |
[TGSI_OPCODE_COS] = soa_scalar_replicate, |
[TGSI_OPCODE_DDX] = soa_partial_derivative, |
[TGSI_OPCODE_DDY] = soa_partial_derivative, |
[TGSI_OPCODE_KILL] = soa_passthrough, |
[TGSI_OPCODE_PK2H] = soa_PK2H, |
[TGSI_OPCODE_PK2US] = soa_unsupported, |
[TGSI_OPCODE_PK4B] = soa_unsupported, |
[TGSI_OPCODE_PK4UB] = soa_unsupported, |
[TGSI_OPCODE_RFL] = soa_unsupported, |
[TGSI_OPCODE_SEQ] = soa_per_channel, |
[TGSI_OPCODE_SFL] = soa_per_channel, |
[TGSI_OPCODE_SGT] = soa_per_channel, |
[TGSI_OPCODE_SIN] = soa_scalar_replicate, |
[TGSI_OPCODE_SLE] = soa_per_channel, |
[TGSI_OPCODE_SNE] = soa_per_channel, |
[TGSI_OPCODE_STR] = soa_per_channel, |
[TGSI_OPCODE_TEX] = soa_passthrough, |
[TGSI_OPCODE_TXD] = soa_passthrough, |
[TGSI_OPCODE_TXP] = soa_passthrough, |
[TGSI_OPCODE_UP2H] = soa_UP2H, |
[TGSI_OPCODE_UP2US] = soa_unsupported, |
[TGSI_OPCODE_UP4B] = soa_unsupported, |
[TGSI_OPCODE_UP4UB] = soa_unsupported, |
[TGSI_OPCODE_X2D] = soa_unsupported, |
[TGSI_OPCODE_ARA] = soa_unsupported, |
[TGSI_OPCODE_ARR] = soa_per_channel, |
[TGSI_OPCODE_BRA] = soa_unsupported, |
[TGSI_OPCODE_CAL] = soa_unsupported, |
[TGSI_OPCODE_RET] = soa_unsupported, |
[TGSI_OPCODE_SSG] = soa_per_channel, |
[TGSI_OPCODE_CMP] = soa_per_channel, |
[TGSI_OPCODE_SCS] = soa_SCS, |
[TGSI_OPCODE_TXB] = soa_passthrough, |
[TGSI_OPCODE_NRM] = soa_NRM, |
[TGSI_OPCODE_DIV] = soa_per_channel, |
[TGSI_OPCODE_DP2] = soa_dot_product, |
[TGSI_OPCODE_TXL] = soa_passthrough, |
[TGSI_OPCODE_BRK] = soa_passthrough, |
[TGSI_OPCODE_IF] = soa_if, |
[TGSI_OPCODE_UIF] = soa_if, |
[76] = soa_unsupported, |
[TGSI_OPCODE_ELSE] = soa_passthrough, |
[TGSI_OPCODE_ENDIF] = soa_passthrough, |
[79] = soa_unsupported, |
[80] = soa_unsupported, |
[TGSI_OPCODE_PUSHA] = soa_unsupported, |
[TGSI_OPCODE_POPA] = soa_unsupported, |
[TGSI_OPCODE_CEIL] = soa_per_channel, |
[TGSI_OPCODE_I2F] = soa_per_channel, |
[TGSI_OPCODE_NOT] = soa_per_channel, |
[TGSI_OPCODE_TRUNC] = soa_per_channel, |
[TGSI_OPCODE_SHL] = soa_per_channel, |
[88] = soa_unsupported, |
[TGSI_OPCODE_AND] = soa_per_channel, |
[TGSI_OPCODE_OR] = soa_per_channel, |
[TGSI_OPCODE_MOD] = soa_per_channel, |
[TGSI_OPCODE_XOR] = soa_per_channel, |
[TGSI_OPCODE_SAD] = soa_per_channel, |
[TGSI_OPCODE_TXF] = soa_passthrough, |
[TGSI_OPCODE_TXQ] = soa_passthrough, |
[TGSI_OPCODE_CONT] = soa_passthrough, |
[TGSI_OPCODE_EMIT] = soa_unsupported, |
[TGSI_OPCODE_ENDPRIM] = soa_unsupported, |
[TGSI_OPCODE_BGNLOOP] = soa_passthrough, |
[TGSI_OPCODE_BGNSUB] = soa_unsupported, |
[TGSI_OPCODE_ENDLOOP] = soa_passthrough, |
[TGSI_OPCODE_ENDSUB] = soa_unsupported, |
[TGSI_OPCODE_TXQ_LZ] = soa_passthrough, |
[104] = soa_unsupported, |
[105] = soa_unsupported, |
[106] = soa_unsupported, |
[TGSI_OPCODE_NOP] = soa_passthrough, |
[108] = soa_unsupported, |
[109] = soa_unsupported, |
[110] = soa_unsupported, |
[111] = soa_unsupported, |
[TGSI_OPCODE_NRM4] = soa_NRM4, |
[TGSI_OPCODE_CALLNZ] = soa_unsupported, |
[TGSI_OPCODE_BREAKC] = soa_unsupported, |
[TGSI_OPCODE_KILL_IF] = soa_passthrough, |
[TGSI_OPCODE_END] = soa_passthrough, |
[118] = soa_unsupported, |
[TGSI_OPCODE_F2I] = soa_per_channel, |
[TGSI_OPCODE_IDIV] = soa_per_channel, |
[TGSI_OPCODE_IMAX] = soa_per_channel, |
[TGSI_OPCODE_IMIN] = soa_per_channel, |
[TGSI_OPCODE_INEG] = soa_per_channel, |
[TGSI_OPCODE_ISGE] = soa_per_channel, |
[TGSI_OPCODE_ISHR] = soa_per_channel, |
[TGSI_OPCODE_ISLT] = soa_per_channel, |
[TGSI_OPCODE_F2U] = soa_per_channel, |
[TGSI_OPCODE_U2F] = soa_per_channel, |
[TGSI_OPCODE_UADD] = soa_per_channel, |
[TGSI_OPCODE_UDIV] = soa_per_channel, |
[TGSI_OPCODE_UMAD] = soa_per_channel, |
[TGSI_OPCODE_UMAX] = soa_per_channel, |
[TGSI_OPCODE_UMIN] = soa_per_channel, |
[TGSI_OPCODE_UMOD] = soa_per_channel, |
[TGSI_OPCODE_UMUL] = soa_per_channel, |
[TGSI_OPCODE_USEQ] = soa_per_channel, |
[TGSI_OPCODE_USGE] = soa_per_channel, |
[TGSI_OPCODE_USHR] = soa_per_channel, |
[TGSI_OPCODE_USLT] = soa_per_channel, |
[TGSI_OPCODE_USNE] = soa_per_channel, |
[TGSI_OPCODE_SWITCH] = soa_unsupported, |
[TGSI_OPCODE_CASE] = soa_unsupported, |
[TGSI_OPCODE_DEFAULT] = soa_unsupported, |
[TGSI_OPCODE_ENDSWITCH] = soa_unsupported, |
[TGSI_OPCODE_SAMPLE] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_I] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_B] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_C] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_D] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_L] = soa_passthrough, |
[TGSI_OPCODE_GATHER4] = soa_passthrough, |
[TGSI_OPCODE_SVIEWINFO] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_POS] = soa_passthrough, |
[TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough, |
[TGSI_OPCODE_UARL] = soa_per_channel, |
[TGSI_OPCODE_UCMP] = soa_per_channel, |
[TGSI_OPCODE_IABS] = soa_per_channel, |
[TGSI_OPCODE_ISSG] = soa_per_channel, |
[TGSI_OPCODE_LOAD] = soa_unsupported, |
[TGSI_OPCODE_STORE] = soa_unsupported, |
[TGSI_OPCODE_MFENCE] = soa_unsupported, |
[TGSI_OPCODE_LFENCE] = soa_unsupported, |
[TGSI_OPCODE_SFENCE] = soa_unsupported, |
[TGSI_OPCODE_BARRIER] = soa_unsupported, |
[TGSI_OPCODE_ATOMUADD] = soa_unsupported, |
[TGSI_OPCODE_ATOMXCHG] = soa_unsupported, |
[TGSI_OPCODE_ATOMCAS] = soa_unsupported, |
[TGSI_OPCODE_ATOMAND] = soa_unsupported, |
[TGSI_OPCODE_ATOMOR] = soa_unsupported, |
[TGSI_OPCODE_ATOMXOR] = soa_unsupported, |
[TGSI_OPCODE_ATOMUMIN] = soa_unsupported, |
[TGSI_OPCODE_ATOMUMAX] = soa_unsupported, |
[TGSI_OPCODE_ATOMIMIN] = soa_unsupported, |
[TGSI_OPCODE_ATOMIMAX] = soa_unsupported, |
[TGSI_OPCODE_TEX2] = soa_passthrough, |
[TGSI_OPCODE_TXB2] = soa_passthrough, |
[TGSI_OPCODE_TXL2] = soa_passthrough, |
}; |
static bool |
ra_dst_is_indirect(const struct tgsi_full_dst_register *d) |
{ |
return (d->Register.Indirect || |
(d->Register.Dimension && d->Dimension.Indirect)); |
} |
static int |
ra_dst_index(const struct tgsi_full_dst_register *d) |
{ |
assert(!d->Register.Indirect); |
return d->Register.Index; |
} |
static int |
ra_dst_dimension(const struct tgsi_full_dst_register *d) |
{ |
if (d->Register.Dimension) { |
assert(!d->Dimension.Indirect); |
return d->Dimension.Index; |
} |
else { |
return 0; |
} |
} |
static bool |
ra_is_src_indirect(const struct tgsi_full_src_register *s) |
{ |
return (s->Register.Indirect || |
(s->Register.Dimension && s->Dimension.Indirect)); |
} |
static int |
ra_src_index(const struct tgsi_full_src_register *s) |
{ |
assert(!s->Register.Indirect); |
return s->Register.Index; |
} |
static int |
ra_src_dimension(const struct tgsi_full_src_register *s) |
{ |
if (s->Register.Dimension) { |
assert(!s->Dimension.Indirect); |
return s->Dimension.Index; |
} |
else { |
return 0; |
} |
} |
/** |
* Infer the type of either the sources or the destination. |
*/ |
static enum toy_type |
ra_infer_opcode_type(int tgsi_opcode, bool is_dst) |
{ |
enum tgsi_opcode_type type; |
if (is_dst) |
type = tgsi_opcode_infer_dst_type(tgsi_opcode); |
else |
type = tgsi_opcode_infer_src_type(tgsi_opcode); |
switch (type) { |
case TGSI_TYPE_UNSIGNED: |
return TOY_TYPE_UD; |
case TGSI_TYPE_SIGNED: |
return TOY_TYPE_D; |
case TGSI_TYPE_FLOAT: |
return TOY_TYPE_F; |
case TGSI_TYPE_UNTYPED: |
case TGSI_TYPE_VOID: |
case TGSI_TYPE_DOUBLE: |
default: |
assert(!"unsupported TGSI type"); |
return TOY_TYPE_UD; |
} |
} |
/** |
* Return the type of an operand of the specified instruction. |
*/ |
static enum toy_type |
ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst, |
int operand, bool is_dst) |
{ |
enum toy_type type; |
enum tgsi_file_type file; |
/* we need to look at both src and dst for MOV */ |
/* XXX it should not be this complex */ |
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) { |
const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File; |
const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File; |
if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) { |
type = TOY_TYPE_D; |
} |
else if (src_file == TGSI_FILE_IMMEDIATE && |
!tgsi_inst->Src[0].Register.Indirect) { |
const int src_idx = tgsi_inst->Src[0].Register.Index; |
type = tgsi->imm_data.types[src_idx]; |
} |
else { |
/* this is the best we can do */ |
type = TOY_TYPE_F; |
} |
return type; |
} |
else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) { |
if (!is_dst && operand == 0) |
type = TOY_TYPE_UD; |
else |
type = TOY_TYPE_F; |
return type; |
} |
type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst); |
/* fix the type */ |
file = (is_dst) ? |
tgsi_inst->Dst[operand].Register.File : |
tgsi_inst->Src[operand].Register.File; |
switch (file) { |
case TGSI_FILE_SAMPLER: |
case TGSI_FILE_RESOURCE: |
case TGSI_FILE_SAMPLER_VIEW: |
type = TOY_TYPE_D; |
break; |
case TGSI_FILE_ADDRESS: |
assert(type == TOY_TYPE_D); |
break; |
default: |
break; |
} |
return type; |
} |
/** |
* Allocate a VRF register. |
*/ |
static int |
ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file) |
{ |
const int count = (tgsi->aos) ? 1 : 4; |
return tc_alloc_vrf(tgsi->tc, count); |
} |
/** |
* Construct the key for VRF mapping look-up. |
*/ |
static void * |
ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index) |
{ |
intptr_t key; |
/* this is ugly... */ |
assert(file < 1 << 4); |
assert(dim < 1 << 12); |
assert(index < 1 << 16); |
key = (file << 28) | (dim << 16) | index; |
return intptr_to_pointer(key); |
} |
/** |
* Map a TGSI register to a VRF register. |
*/ |
static int |
ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file, |
int dim, int index, bool *is_new) |
{ |
void *key, *val; |
intptr_t vrf; |
key = ra_get_map_key(file, dim, index); |
/* |
* because we allocate vrf from 1 and on, val is never NULL as long as the |
* key exists |
*/ |
val = util_hash_table_get(tgsi->reg_mapping, key); |
if (val) { |
vrf = pointer_to_intptr(val); |
if (is_new) |
*is_new = false; |
} |
else { |
vrf = (intptr_t) ra_alloc_reg(tgsi, file); |
/* add to the mapping */ |
val = intptr_to_pointer(vrf); |
util_hash_table_set(tgsi->reg_mapping, key, val); |
if (is_new) |
*is_new = true; |
} |
return (int) vrf; |
} |
/** |
* Return true if the destination aliases any of the sources. |
*/ |
static bool |
ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index) |
{ |
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index]; |
int i; |
/* we need a scratch register for indirect dst anyway */ |
if (ra_dst_is_indirect(d)) |
return true; |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) { |
const struct tgsi_full_src_register *s = &tgsi_inst->Src[i]; |
if (s->Register.File != d->Register.File) |
continue; |
/* |
* we can go on to check dimension and index respectively, but |
* keep it simple for now |
*/ |
if (ra_is_src_indirect(s)) |
return true; |
if (ra_src_dimension(s) == ra_dst_dimension(d) && |
ra_src_index(s) == ra_dst_index(d)) |
return true; |
} |
return false; |
} |
/** |
* Return the toy register for a TGSI destination operand. |
*/ |
static struct toy_dst |
ra_get_dst(struct toy_tgsi *tgsi, |
const struct tgsi_full_instruction *tgsi_inst, int dst_index, |
bool *is_scratch) |
{ |
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index]; |
bool need_vrf = false; |
struct toy_dst dst; |
switch (d->Register.File) { |
case TGSI_FILE_NULL: |
dst = tdst_null(); |
break; |
case TGSI_FILE_OUTPUT: |
case TGSI_FILE_TEMPORARY: |
case TGSI_FILE_ADDRESS: |
case TGSI_FILE_PREDICATE: |
need_vrf = true; |
break; |
default: |
assert(!"unhandled dst file"); |
dst = tdst_null(); |
break; |
} |
if (need_vrf) { |
/* XXX we do not always need a scratch given the conditions... */ |
const bool need_scratch = |
(ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) || |
tgsi_inst->Instruction.Saturate); |
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true); |
int vrf; |
if (need_scratch) { |
vrf = ra_alloc_reg(tgsi, d->Register.File); |
} |
else { |
vrf = ra_map_reg(tgsi, d->Register.File, |
ra_dst_dimension(d), ra_dst_index(d), NULL); |
} |
if (is_scratch) |
*is_scratch = need_scratch; |
dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, |
false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH); |
} |
return dst; |
} |
static struct toy_src |
ra_get_src_for_vrf(const struct tgsi_full_src_register *s, |
enum toy_type type, int vrf) |
{ |
return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, |
false, 0, |
s->Register.SwizzleX, s->Register.SwizzleY, |
s->Register.SwizzleZ, s->Register.SwizzleW, |
s->Register.Absolute, s->Register.Negate, |
vrf * TOY_REG_WIDTH); |
} |
static int |
init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst, |
enum tgsi_file_type file, int index, |
const struct tgsi_ind_register *indirect, |
const struct tgsi_dimension *dimension, |
const struct tgsi_ind_register *dim_indirect) |
{ |
struct toy_src src; |
int num_src = 0; |
/* src[0]: TGSI file */ |
inst->src[num_src++] = tsrc_imm_d(file); |
/* src[1]: TGSI dimension */ |
inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0); |
/* src[2]: TGSI dimension indirection */ |
if (dim_indirect) { |
const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0, |
dim_indirect->Index, NULL); |
src = tsrc(TOY_FILE_VRF, vrf, 0); |
src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle); |
} |
else { |
src = tsrc_imm_d(0); |
} |
inst->src[num_src++] = src; |
/* src[3]: TGSI index */ |
inst->src[num_src++] = tsrc_imm_d(index); |
/* src[4]: TGSI index indirection */ |
if (indirect) { |
const int vrf = ra_map_reg(tgsi, indirect->File, 0, |
indirect->Index, NULL); |
src = tsrc(TOY_FILE_VRF, vrf, 0); |
src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle); |
} |
else { |
src = tsrc_imm_d(0); |
} |
inst->src[num_src++] = src; |
return num_src; |
} |
static struct toy_src |
ra_get_src_indirect(struct toy_tgsi *tgsi, |
const struct tgsi_full_instruction *tgsi_inst, |
int src_index) |
{ |
const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index]; |
bool need_vrf = false, is_resource = false; |
struct toy_src src; |
switch (s->Register.File) { |
case TGSI_FILE_NULL: |
src = tsrc_null(); |
break; |
case TGSI_FILE_SAMPLER: |
case TGSI_FILE_RESOURCE: |
case TGSI_FILE_SAMPLER_VIEW: |
is_resource = true; |
/* fall through */ |
case TGSI_FILE_CONSTANT: |
case TGSI_FILE_INPUT: |
case TGSI_FILE_SYSTEM_VALUE: |
case TGSI_FILE_TEMPORARY: |
case TGSI_FILE_ADDRESS: |
case TGSI_FILE_IMMEDIATE: |
case TGSI_FILE_PREDICATE: |
need_vrf = true; |
break; |
default: |
assert(!"unhandled src file"); |
src = tsrc_null(); |
break; |
} |
if (need_vrf) { |
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false); |
int vrf; |
if (is_resource) { |
assert(!s->Register.Dimension); |
assert(s->Register.Indirect); |
vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL); |
} |
else { |
vrf = ra_alloc_reg(tgsi, s->Register.File); |
} |
src = ra_get_src_for_vrf(s, type, vrf); |
/* emit indirect fetch */ |
if (!is_resource) { |
struct toy_inst *inst; |
inst = tc_add(tgsi->tc); |
inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH; |
inst->dst = tdst_from(src); |
inst->dst.writemask = TOY_WRITEMASK_XYZW; |
init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index, |
(s->Register.Indirect) ? &s->Indirect : NULL, |
(s->Register.Dimension) ? &s->Dimension : NULL, |
(s->Dimension.Indirect) ? &s->DimIndirect : NULL); |
} |
} |
return src; |
} |
/** |
* Return the toy register for a TGSI source operand. |
*/ |
static struct toy_src |
ra_get_src(struct toy_tgsi *tgsi, |
const struct tgsi_full_instruction *tgsi_inst, |
int src_index) |
{ |
const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index]; |
bool need_vrf = false; |
struct toy_src src; |
if (ra_is_src_indirect(s)) |
return ra_get_src_indirect(tgsi, tgsi_inst, src_index); |
switch (s->Register.File) { |
case TGSI_FILE_NULL: |
src = tsrc_null(); |
break; |
case TGSI_FILE_CONSTANT: |
case TGSI_FILE_INPUT: |
case TGSI_FILE_SYSTEM_VALUE: |
need_vrf = true; |
break; |
case TGSI_FILE_TEMPORARY: |
case TGSI_FILE_ADDRESS: |
case TGSI_FILE_PREDICATE: |
need_vrf = true; |
break; |
case TGSI_FILE_SAMPLER: |
case TGSI_FILE_RESOURCE: |
case TGSI_FILE_SAMPLER_VIEW: |
assert(!s->Register.Dimension); |
src = tsrc_imm_d(s->Register.Index); |
break; |
case TGSI_FILE_IMMEDIATE: |
{ |
const uint32_t *imm; |
enum toy_type imm_type; |
bool is_scalar; |
imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type); |
is_scalar = |
(imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] && |
imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] && |
imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]); |
if (is_scalar) { |
const enum toy_type type = |
ra_get_type(tgsi, tgsi_inst, src_index, false); |
/* ignore imm_type */ |
src = tsrc_imm_ud(imm[s->Register.SwizzleX]); |
src.type = type; |
src.absolute = s->Register.Absolute; |
src.negate = s->Register.Negate; |
} |
else { |
need_vrf = true; |
} |
} |
break; |
default: |
assert(!"unhandled src file"); |
src = tsrc_null(); |
break; |
} |
if (need_vrf) { |
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false); |
bool is_new; |
int vrf; |
vrf = ra_map_reg(tgsi, s->Register.File, |
ra_src_dimension(s), ra_src_index(s), &is_new); |
src = ra_get_src_for_vrf(s, type, vrf); |
if (is_new) { |
switch (s->Register.File) { |
case TGSI_FILE_TEMPORARY: |
case TGSI_FILE_ADDRESS: |
case TGSI_FILE_PREDICATE: |
{ |
struct toy_dst dst = tdst_from(src); |
dst.writemask = TOY_WRITEMASK_XYZW; |
/* always initialize registers before use */ |
if (tgsi->aos) { |
tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type)); |
} |
else { |
struct toy_dst tdst[4]; |
int i; |
tdst_transpose(dst, tdst); |
for (i = 0; i < 4; i++) { |
tc_MOV(tgsi->tc, tdst[i], |
tsrc_type(tsrc_imm_d(0), type)); |
} |
} |
} |
break; |
default: |
break; |
} |
} |
} |
return src; |
} |
static void |
parse_instruction(struct toy_tgsi *tgsi, |
const struct tgsi_full_instruction *tgsi_inst) |
{ |
struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS]; |
struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS]; |
bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS]; |
toy_tgsi_translate translate; |
int i; |
/* convert TGSI registers to toy registers */ |
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) |
src[i] = ra_get_src(tgsi, tgsi_inst, i); |
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) |
dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]); |
/* translate the instruction */ |
translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode]; |
translate(tgsi->tc, tgsi_inst, dst, src); |
/* write the result to the real destinations if needed */ |
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { |
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; |
if (!dst_is_scratch[i]) |
continue; |
if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE) |
tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled"); |
tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate; |
/* emit indirect store */ |
if (ra_dst_is_indirect(d)) { |
struct toy_inst *inst; |
inst = tc_add(tgsi->tc); |
inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE; |
inst->dst = dst[i]; |
init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index, |
(d->Register.Indirect) ? &d->Indirect : NULL, |
(d->Register.Dimension) ? &d->Dimension : NULL, |
(d->Dimension.Indirect) ? &d->DimIndirect : NULL); |
} |
else { |
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true); |
struct toy_dst real_dst; |
int vrf; |
vrf = ra_map_reg(tgsi, d->Register.File, |
ra_dst_dimension(d), ra_dst_index(d), NULL); |
real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR, |
false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH); |
if (tgsi->aos) { |
tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i])); |
} |
else { |
struct toy_dst tdst[4]; |
struct toy_src tsrc[4]; |
int j; |
tdst_transpose(real_dst, tdst); |
tsrc_transpose(tsrc_from(dst[i]), tsrc); |
for (j = 0; j < 4; j++) |
tc_MOV(tgsi->tc, tdst[j], tsrc[j]); |
} |
} |
tgsi->tc->templ.saturate = false; |
} |
switch (tgsi_inst->Instruction.Opcode) { |
case TGSI_OPCODE_KILL_IF: |
case TGSI_OPCODE_KILL: |
tgsi->uses_kill = true; |
break; |
} |
/* remember channels written */ |
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) { |
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i]; |
if (d->Register.File != TGSI_FILE_OUTPUT) |
continue; |
for (i = 0; i < tgsi->num_outputs; i++) { |
if (tgsi->outputs[i].index == d->Register.Index) { |
tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask; |
break; |
} |
} |
} |
} |
static void |
decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) |
{ |
static const struct tgsi_declaration_interp default_interp = { |
TGSI_INTERPOLATE_PERSPECTIVE, false, 0, |
}; |
const struct tgsi_declaration_interp *interp = |
(decl->Declaration.Interpolate) ? &decl->Interp: &default_interp; |
int index; |
if (decl->Range.Last >= Elements(tgsi->inputs)) { |
assert(!"invalid IN"); |
return; |
} |
for (index = decl->Range.First; index <= decl->Range.Last; index++) { |
const int slot = tgsi->num_inputs++; |
tgsi->inputs[slot].index = index; |
tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask; |
if (decl->Declaration.Semantic) { |
tgsi->inputs[slot].semantic_name = decl->Semantic.Name; |
tgsi->inputs[slot].semantic_index = decl->Semantic.Index; |
} |
else { |
tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC; |
tgsi->inputs[slot].semantic_index = index; |
} |
tgsi->inputs[slot].interp = interp->Interpolate; |
tgsi->inputs[slot].centroid = interp->Centroid; |
} |
} |
static void |
decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) |
{ |
int index; |
if (decl->Range.Last >= Elements(tgsi->outputs)) { |
assert(!"invalid OUT"); |
return; |
} |
assert(decl->Declaration.Semantic); |
for (index = decl->Range.First; index <= decl->Range.Last; index++) { |
const int slot = tgsi->num_outputs++; |
tgsi->outputs[slot].index = index; |
tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW; |
tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask; |
tgsi->outputs[slot].semantic_name = decl->Semantic.Name; |
tgsi->outputs[slot].semantic_index = decl->Semantic.Index; |
} |
} |
static void |
decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl) |
{ |
int index; |
if (decl->Range.Last >= Elements(tgsi->system_values)) { |
assert(!"invalid SV"); |
return; |
} |
for (index = decl->Range.First; index <= decl->Range.Last; index++) { |
const int slot = tgsi->num_system_values++; |
tgsi->system_values[slot].index = index; |
if (decl->Declaration.Semantic) { |
tgsi->system_values[slot].semantic_name = decl->Semantic.Name; |
tgsi->system_values[slot].semantic_index = decl->Semantic.Index; |
} |
else { |
tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC; |
tgsi->system_values[slot].semantic_index = index; |
} |
} |
} |
/** |
* Emit an instruction to fetch the value of a TGSI register. |
*/ |
static void |
fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx) |
{ |
struct toy_dst dst; |
int vrf; |
enum toy_opcode opcode; |
enum toy_type type = TOY_TYPE_F; |
switch (file) { |
case TGSI_FILE_INPUT: |
opcode = TOY_OPCODE_TGSI_IN; |
break; |
case TGSI_FILE_CONSTANT: |
opcode = TOY_OPCODE_TGSI_CONST; |
break; |
case TGSI_FILE_SYSTEM_VALUE: |
opcode = TOY_OPCODE_TGSI_SV; |
break; |
case TGSI_FILE_IMMEDIATE: |
opcode = TOY_OPCODE_TGSI_IMM; |
toy_tgsi_get_imm(tgsi, idx, &type); |
break; |
default: |
/* no need to fetch */ |
return; |
break; |
} |
vrf = ra_map_reg(tgsi, file, dim, idx, NULL); |
dst = tdst(TOY_FILE_VRF, vrf, 0); |
dst = tdst_type(dst, type); |
tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx)); |
} |
static void |
parse_declaration(struct toy_tgsi *tgsi, |
const struct tgsi_full_declaration *decl) |
{ |
int i; |
switch (decl->Declaration.File) { |
case TGSI_FILE_INPUT: |
decl_add_in(tgsi, decl); |
break; |
case TGSI_FILE_OUTPUT: |
decl_add_out(tgsi, decl); |
break; |
case TGSI_FILE_SYSTEM_VALUE: |
decl_add_sv(tgsi, decl); |
break; |
case TGSI_FILE_IMMEDIATE: |
/* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */ |
assert(!"unexpected immediate declaration"); |
break; |
case TGSI_FILE_NULL: |
case TGSI_FILE_CONSTANT: |
case TGSI_FILE_TEMPORARY: |
case TGSI_FILE_SAMPLER: |
case TGSI_FILE_PREDICATE: |
case TGSI_FILE_ADDRESS: |
case TGSI_FILE_RESOURCE: |
case TGSI_FILE_SAMPLER_VIEW: |
/* nothing to do */ |
break; |
default: |
assert(!"unhandled TGSI file"); |
break; |
} |
/* fetch the registers now */ |
for (i = decl->Range.First; i <= decl->Range.Last; i++) { |
const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0; |
fetch_source(tgsi, decl->Declaration.File, dim, i); |
} |
} |
static int |
add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf) |
{ |
/* reallocate the buffer if necessary */ |
if (tgsi->imm_data.cur >= tgsi->imm_data.size) { |
const int cur_size = tgsi->imm_data.size; |
int new_size; |
enum toy_type *new_types; |
uint32_t (*new_buf)[4]; |
new_size = (cur_size) ? cur_size << 1 : 16; |
while (new_size <= tgsi->imm_data.cur) |
new_size <<= 1; |
new_buf = REALLOC(tgsi->imm_data.buf, |
cur_size * sizeof(new_buf[0]), |
new_size * sizeof(new_buf[0])); |
new_types = REALLOC(tgsi->imm_data.types, |
cur_size * sizeof(new_types[0]), |
new_size * sizeof(new_types[0])); |
if (!new_buf || !new_types) { |
if (new_buf) |
FREE(new_buf); |
if (new_types) |
FREE(new_types); |
return -1; |
} |
tgsi->imm_data.buf = new_buf; |
tgsi->imm_data.types = new_types; |
tgsi->imm_data.size = new_size; |
} |
tgsi->imm_data.types[tgsi->imm_data.cur] = type; |
memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur], |
buf, sizeof(tgsi->imm_data.buf[0])); |
return tgsi->imm_data.cur++; |
} |
static void |
parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm) |
{ |
enum toy_type type; |
uint32_t imm_buf[4]; |
int idx; |
switch (imm->Immediate.DataType) { |
case TGSI_IMM_FLOAT32: |
type = TOY_TYPE_F; |
imm_buf[0] = fui(imm->u[0].Float); |
imm_buf[1] = fui(imm->u[1].Float); |
imm_buf[2] = fui(imm->u[2].Float); |
imm_buf[3] = fui(imm->u[3].Float); |
break; |
case TGSI_IMM_INT32: |
type = TOY_TYPE_D; |
imm_buf[0] = (uint32_t) imm->u[0].Int; |
imm_buf[1] = (uint32_t) imm->u[1].Int; |
imm_buf[2] = (uint32_t) imm->u[2].Int; |
imm_buf[3] = (uint32_t) imm->u[3].Int; |
break; |
case TGSI_IMM_UINT32: |
type = TOY_TYPE_UD; |
imm_buf[0] = imm->u[0].Uint; |
imm_buf[1] = imm->u[1].Uint; |
imm_buf[2] = imm->u[2].Uint; |
imm_buf[3] = imm->u[3].Uint; |
break; |
default: |
assert(!"unhandled TGSI imm type"); |
type = TOY_TYPE_F; |
memset(imm_buf, 0, sizeof(imm_buf)); |
break; |
} |
idx = add_imm(tgsi, type, imm_buf); |
if (idx >= 0) |
fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx); |
else |
tc_fail(tgsi->tc, "failed to add TGSI imm"); |
} |
static void |
parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop) |
{ |
switch (prop->Property.PropertyName) { |
case TGSI_PROPERTY_VS_PROHIBIT_UCPS: |
tgsi->props.vs_prohibit_ucps = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_FS_COORD_ORIGIN: |
tgsi->props.fs_coord_origin = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: |
tgsi->props.fs_coord_pixel_center = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: |
tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_FS_DEPTH_LAYOUT: |
tgsi->props.fs_depth_layout = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_GS_INPUT_PRIM: |
tgsi->props.gs_input_prim = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_GS_OUTPUT_PRIM: |
tgsi->props.gs_output_prim = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: |
tgsi->props.gs_max_output_vertices = prop->u[0].Data; |
break; |
default: |
assert(!"unhandled TGSI property"); |
break; |
} |
} |
static void |
parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token) |
{ |
switch (token->Token.Type) { |
case TGSI_TOKEN_TYPE_DECLARATION: |
parse_declaration(tgsi, &token->FullDeclaration); |
break; |
case TGSI_TOKEN_TYPE_IMMEDIATE: |
parse_immediate(tgsi, &token->FullImmediate); |
break; |
case TGSI_TOKEN_TYPE_INSTRUCTION: |
parse_instruction(tgsi, &token->FullInstruction); |
break; |
case TGSI_TOKEN_TYPE_PROPERTY: |
parse_property(tgsi, &token->FullProperty); |
break; |
default: |
assert(!"unhandled TGSI token type"); |
break; |
} |
} |
static enum pipe_error |
dump_reg_mapping(void *key, void *val, void *data) |
{ |
int tgsi_file, tgsi_dim, tgsi_index; |
uint32_t sig, vrf; |
sig = (uint32_t) pointer_to_intptr(key); |
vrf = (uint32_t) pointer_to_intptr(val); |
/* see ra_get_map_key() */ |
tgsi_file = (sig >> 28) & 0xf; |
tgsi_dim = (sig >> 16) & 0xfff; |
tgsi_index = (sig >> 0) & 0xffff; |
if (tgsi_dim) { |
ilo_printf(" v%d:\t%s[%d][%d]\n", vrf, |
tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index); |
} |
else { |
ilo_printf(" v%d:\t%s[%d]\n", vrf, |
tgsi_file_name(tgsi_file), tgsi_index); |
} |
return PIPE_OK; |
} |
/** |
* Dump the TGSI translator, currently only the register mapping. |
*/ |
void |
toy_tgsi_dump(const struct toy_tgsi *tgsi) |
{ |
util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL); |
} |
/** |
* Clean up the TGSI translator. |
*/ |
void |
toy_tgsi_cleanup(struct toy_tgsi *tgsi) |
{ |
FREE(tgsi->imm_data.buf); |
FREE(tgsi->imm_data.types); |
util_hash_table_destroy(tgsi->reg_mapping); |
} |
static unsigned |
reg_mapping_hash(void *key) |
{ |
return (unsigned) pointer_to_intptr(key); |
} |
static int |
reg_mapping_compare(void *key1, void *key2) |
{ |
return (key1 != key2); |
} |
/** |
* Initialize the TGSI translator. |
*/ |
static bool |
init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos) |
{ |
memset(tgsi, 0, sizeof(*tgsi)); |
tgsi->tc = tc; |
tgsi->aos = aos; |
tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table; |
/* create a mapping of TGSI registers to VRF reigsters */ |
tgsi->reg_mapping = |
util_hash_table_create(reg_mapping_hash, reg_mapping_compare); |
return (tgsi->reg_mapping != NULL); |
} |
/** |
* Translate TGSI tokens into toy instructions. |
*/ |
void |
toy_compiler_translate_tgsi(struct toy_compiler *tc, |
const struct tgsi_token *tokens, bool aos, |
struct toy_tgsi *tgsi) |
{ |
struct tgsi_parse_context parse; |
if (!init_tgsi(tgsi, tc, aos)) { |
tc_fail(tc, "failed to initialize TGSI translator"); |
return; |
} |
tgsi_parse_init(&parse, tokens); |
while (!tgsi_parse_end_of_tokens(&parse)) { |
tgsi_parse_token(&parse); |
parse_token(tgsi, &parse.FullToken); |
} |
tgsi_parse_free(&parse); |
} |
/** |
* Map the TGSI register to VRF register. |
*/ |
int |
toy_tgsi_get_vrf(const struct toy_tgsi *tgsi, |
enum tgsi_file_type file, int dimension, int index) |
{ |
void *key, *val; |
key = ra_get_map_key(file, dimension, index); |
val = util_hash_table_get(tgsi->reg_mapping, key); |
return (val) ? pointer_to_intptr(val) : -1; |
} |
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_tgsi.h |
---|
0,0 → 1,163 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
#ifndef TOY_TGSI_H |
#define TOY_TGSI_H |
#include "pipe/p_state.h" |
#include "pipe/p_shader_tokens.h" |
#include "toy_compiler.h" |
struct tgsi_token; |
struct tgsi_full_instruction; |
struct util_hash_table; |
typedef void (*toy_tgsi_translate)(struct toy_compiler *tc, |
const struct tgsi_full_instruction *tgsi_inst, |
struct toy_dst *dst, |
struct toy_src *src); |
struct toy_tgsi { |
struct toy_compiler *tc; |
bool aos; |
const toy_tgsi_translate *translate_table; |
struct util_hash_table *reg_mapping; |
struct { |
bool vs_prohibit_ucps; |
int fs_coord_origin; |
int fs_coord_pixel_center; |
bool fs_color0_writes_all_cbufs; |
int fs_depth_layout; |
int gs_input_prim; |
int gs_output_prim; |
int gs_max_output_vertices; |
} props; |
struct { |
enum toy_type *types; |
uint32_t (*buf)[4]; |
int cur, size; |
} imm_data; |
struct { |
int index:16; |
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */ |
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */ |
unsigned semantic_index:8; |
unsigned interp:4; /* TGSI_INTERPOLATE_x */ |
unsigned centroid:1; |
} inputs[PIPE_MAX_SHADER_INPUTS]; |
int num_inputs; |
struct { |
int index:16; |
unsigned undefined_mask:4; |
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */ |
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */ |
unsigned semantic_index:8; |
} outputs[PIPE_MAX_SHADER_OUTPUTS]; |
int num_outputs; |
struct { |
int index:16; |
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */ |
unsigned semantic_index:8; |
} system_values[8]; |
int num_system_values; |
bool uses_kill; |
}; |
/** |
* Find the slot of the TGSI input. |
*/ |
static inline int |
toy_tgsi_find_input(const struct toy_tgsi *tgsi, int index) |
{ |
int slot; |
for (slot = 0; slot < tgsi->num_inputs; slot++) { |
if (tgsi->inputs[slot].index == index) |
return slot; |
} |
return -1; |
} |
/** |
* Find the slot of the TGSI system value. |
*/ |
static inline int |
toy_tgsi_find_system_value(const struct toy_tgsi *tgsi, int index) |
{ |
int slot; |
for (slot = 0; slot < tgsi->num_system_values; slot++) { |
if (tgsi->system_values[slot].index == index) |
return slot; |
} |
return -1; |
} |
/** |
* Return the immediate data of the TGSI immediate. |
*/ |
static inline const uint32_t * |
toy_tgsi_get_imm(const struct toy_tgsi *tgsi, unsigned index, |
enum toy_type *type) |
{ |
const uint32_t *imm; |
if (index >= tgsi->imm_data.cur) |
return NULL; |
imm = tgsi->imm_data.buf[index]; |
if (type) |
*type = tgsi->imm_data.types[index]; |
return imm; |
} |
void |
toy_compiler_translate_tgsi(struct toy_compiler *tc, |
const struct tgsi_token *tokens, bool aos, |
struct toy_tgsi *tgsi); |
void |
toy_tgsi_cleanup(struct toy_tgsi *tgsi); |
int |
toy_tgsi_get_vrf(const struct toy_tgsi *tgsi, |
enum tgsi_file_type file, int dimension, int index); |
void |
toy_tgsi_dump(const struct toy_tgsi *tgsi); |
#endif /* TOY_TGSI_H */ |