Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4357 → Rev 4358

/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Android.mk
0,0 → 1,39
# Mesa 3-D graphics library
#
# Copyright (C) 2013 LunarG Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
 
LOCAL_PATH := $(call my-dir)
 
# get C_SOURCES
include $(LOCAL_PATH)/Makefile.sources
 
include $(CLEAR_VARS)
 
LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/include \
$(GALLIUM_TOP)/winsys/intel
 
LOCAL_SRC_FILES := $(C_SOURCES)
 
LOCAL_MODULE := libmesa_pipe_ilo
 
include $(GALLIUM_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Makefile.am
0,0 → 1,37
# Copyright © 2012 Intel Corporation
# Copyright (C) 2013 LunarG, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
 
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
 
noinst_LTLIBRARIES = libilo.la
 
AM_CPPFLAGS = \
-Iinclude \
-I$(top_srcdir)/src/gallium/winsys/intel \
$(GALLIUM_CFLAGS)
 
AM_CFLAGS = \
$(VISIBILITY_CFLAGS)
 
libilo_la_SOURCES = $(C_SOURCES)
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Makefile.in
0,0 → 1,936
# Makefile.in generated by automake 1.14 from Makefile.am.
# @configure_input@
 
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
 
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
 
@SET_MAKE@
 
# Copyright © 2012 Intel Corporation
# Copyright (C) 2013 LunarG, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
 
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
DIST_COMMON = $(srcdir)/Makefile.sources \
$(top_srcdir)/src/gallium/Automake.inc $(srcdir)/Makefile.in \
$(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp
subdir = src/gallium/drivers/ilo
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
$(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
$(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
$(top_srcdir)/m4/ax_prog_flex.m4 \
$(top_srcdir)/m4/ax_pthread.m4 \
$(top_srcdir)/m4/ax_python_module.m4 \
$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libilo_la_LIBADD =
am__objects_1 = ilo_3d.lo ilo_3d_pipeline.lo ilo_3d_pipeline_dump.lo \
ilo_3d_pipeline_gen6.lo ilo_3d_pipeline_gen7.lo ilo_blit.lo \
ilo_blitter.lo ilo_blitter_blt.lo ilo_blitter_pipe.lo \
ilo_context.lo ilo_cp.lo ilo_format.lo ilo_gpe_gen6.lo \
ilo_gpe_gen7.lo ilo_gpgpu.lo ilo_query.lo ilo_resource.lo \
ilo_screen.lo ilo_shader.lo ilo_state.lo ilo_transfer.lo \
ilo_video.lo ilo_shader_cs.lo ilo_shader_fs.lo \
ilo_shader_gs.lo ilo_shader_vs.lo toy_compiler.lo \
toy_compiler_asm.lo toy_compiler_disasm.lo toy_legalize.lo \
toy_legalize_ra.lo toy_optimize.lo toy_tgsi.lo
am_libilo_la_OBJECTS = $(am__objects_1)
libilo_la_OBJECTS = $(am_libilo_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libilo_la_SOURCES)
DIST_SOURCES = $(libilo_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
BUILD_EXEEXT = @BUILD_EXEEXT@
BUILD_OBJEXT = @BUILD_OBJEXT@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CC_FOR_BUILD = @CC_FOR_BUILD@
CFLAGS = @CFLAGS@
CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
CLOCK_LIB = @CLOCK_LIB@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
CPP_FOR_BUILD = @CPP_FOR_BUILD@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
CXX_FOR_BUILD = @CXX_FOR_BUILD@
CYGPATH_W = @CYGPATH_W@
DEFINES = @DEFINES@
DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DLOPEN_LIBS = @DLOPEN_LIBS@
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
DRIGL_CFLAGS = @DRIGL_CFLAGS@
DRIGL_LIBS = @DRIGL_LIBS@
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
DRI_LIB_DEPS = @DRI_LIB_DEPS@
DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
EGL_LIB_DEPS = @EGL_LIB_DEPS@
EGL_LIB_GLOB = @EGL_LIB_GLOB@
EGL_LIB_NAME = @EGL_LIB_NAME@
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
EGL_PLATFORMS = @EGL_PLATFORMS@
EGREP = @EGREP@
ELF_LIB = @ELF_LIB@
EXEEXT = @EXEEXT@
EXPAT_INCLUDES = @EXPAT_INCLUDES@
FGREP = @FGREP@
FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
FREEDRENO_LIBS = @FREEDRENO_LIBS@
GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
GLPROTO_LIBS = @GLPROTO_LIBS@
GLX_TLS = @GLX_TLS@
GL_LIB = @GL_LIB@
GL_LIB_DEPS = @GL_LIB_DEPS@
GL_LIB_GLOB = @GL_LIB_GLOB@
GL_LIB_NAME = @GL_LIB_NAME@
GL_PC_CFLAGS = @GL_PC_CFLAGS@
GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
GREP = @GREP@
HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
INDENT = @INDENT@
INDENT_FLAGS = @INDENT_FLAGS@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INTEL_CFLAGS = @INTEL_CFLAGS@
INTEL_LIBS = @INTEL_LIBS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
LEX = @LEX@
LEXLIB = @LEXLIB@
LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
LIBDRM_LIBS = @LIBDRM_LIBS@
LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
LIBUDEV_LIBS = @LIBUDEV_LIBS@
LIB_DIR = @LIB_DIR@
LIPO = @LIPO@
LLVM_BINDIR = @LLVM_BINDIR@
LLVM_CFLAGS = @LLVM_CFLAGS@
LLVM_CONFIG = @LLVM_CONFIG@
LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
LLVM_LDFLAGS = @LLVM_LDFLAGS@
LLVM_LIBDIR = @LLVM_LIBDIR@
LLVM_LIBS = @LLVM_LIBS@
LLVM_VERSION = @LLVM_VERSION@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKE = @MAKE@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MESA_LLVM = @MESA_LLVM@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
NOUVEAU_LIBS = @NOUVEAU_LIBS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
OSMESA_LIB = @OSMESA_LIB@
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
OSMESA_PC_REQ = @OSMESA_PC_REQ@
OSMESA_VERSION = @OSMESA_VERSION@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
PERL = @PERL@
PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
POSIX_SHELL = @POSIX_SHELL@
PTHREAD_CC = @PTHREAD_CC@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
PTHREAD_LIBS = @PTHREAD_LIBS@
PYTHON2 = @PYTHON2@
RADEON_CFLAGS = @RADEON_CFLAGS@
RADEON_LIBS = @RADEON_LIBS@
RANLIB = @RANLIB@
SED = @SED@
SELINUX_LIBS = @SELINUX_LIBS@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VDPAU_CFLAGS = @VDPAU_CFLAGS@
VDPAU_LIBS = @VDPAU_LIBS@
VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
VDPAU_MAJOR = @VDPAU_MAJOR@
VDPAU_MINOR = @VDPAU_MINOR@
VERSION = @VERSION@
VG_LIB_DEPS = @VG_LIB_DEPS@
VG_LIB_GLOB = @VG_LIB_GLOB@
VG_LIB_NAME = @VG_LIB_NAME@
VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
WAYLAND_LIBS = @WAYLAND_LIBS@
WAYLAND_SCANNER = @WAYLAND_SCANNER@
X11_INCLUDES = @X11_INCLUDES@
XA_MAJOR = @XA_MAJOR@
XA_MINOR = @XA_MINOR@
XA_TINY = @XA_TINY@
XA_VERSION = @XA_VERSION@
XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
XEXT_CFLAGS = @XEXT_CFLAGS@
XEXT_LIBS = @XEXT_LIBS@
XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
XLIBGL_LIBS = @XLIBGL_LIBS@
XORG_CFLAGS = @XORG_CFLAGS@
XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
XORG_LIBS = @XORG_LIBS@
XVMC_CFLAGS = @XVMC_CFLAGS@
XVMC_LIBS = @XVMC_LIBS@
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
XVMC_MAJOR = @XVMC_MAJOR@
XVMC_MINOR = @XVMC_MINOR@
YACC = @YACC@
YFLAGS = @YFLAGS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
ax_pthread_config = @ax_pthread_config@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target = @target@
target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
C_SOURCES := \
ilo_3d.c \
ilo_3d_pipeline.c \
ilo_3d_pipeline_dump.c \
ilo_3d_pipeline_gen6.c \
ilo_3d_pipeline_gen7.c \
ilo_blit.c \
ilo_blitter.c \
ilo_blitter_blt.c \
ilo_blitter_pipe.c \
ilo_context.c \
ilo_cp.c \
ilo_format.c \
ilo_gpe_gen6.c \
ilo_gpe_gen7.c \
ilo_gpgpu.c \
ilo_query.c \
ilo_resource.c \
ilo_screen.c \
ilo_shader.c \
ilo_state.c \
ilo_transfer.c \
ilo_video.c \
shader/ilo_shader_cs.c \
shader/ilo_shader_fs.c \
shader/ilo_shader_gs.c \
shader/ilo_shader_vs.c \
shader/toy_compiler.c \
shader/toy_compiler_asm.c \
shader/toy_compiler_disasm.c \
shader/toy_legalize.c \
shader/toy_legalize_ra.c \
shader/toy_optimize.c \
shader/toy_tgsi.c
 
GALLIUM_CFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
$(DEFINES)
 
noinst_LTLIBRARIES = libilo.la
AM_CPPFLAGS = \
-Iinclude \
-I$(top_srcdir)/src/gallium/winsys/intel \
$(GALLIUM_CFLAGS)
 
AM_CFLAGS = \
$(VISIBILITY_CFLAGS)
 
libilo_la_SOURCES = $(C_SOURCES)
all: all-am
 
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/ilo/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign src/gallium/drivers/ilo/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc:
 
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
 
clean-noinstLTLIBRARIES:
-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
@list='$(noinst_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
 
libilo.la: $(libilo_la_OBJECTS) $(libilo_la_DEPENDENCIES) $(EXTRA_libilo_la_DEPENDENCIES)
$(AM_V_CCLD)$(LINK) $(libilo_la_OBJECTS) $(libilo_la_LIBADD) $(LIBS)
 
mostlyclean-compile:
-rm -f *.$(OBJEXT)
 
distclean-compile:
-rm -f *.tab.c
 
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline_dump.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline_gen6.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_3d_pipeline_gen7.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blit.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blitter.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blitter_blt.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_blitter_pipe.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_context.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_cp.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_format.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_gpe_gen6.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_gpe_gen7.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_gpgpu.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_query.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_resource.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_screen.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_cs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_fs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_gs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_shader_vs.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_state.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_transfer.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ilo_video.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_compiler.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_compiler_asm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_compiler_disasm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_legalize.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_legalize_ra.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_optimize.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/toy_tgsi.Plo@am__quote@
 
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
 
.c.obj:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
 
.c.lo:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
 
ilo_shader_cs.lo: shader/ilo_shader_cs.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_cs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_cs.Tpo -c -o ilo_shader_cs.lo `test -f 'shader/ilo_shader_cs.c' || echo '$(srcdir)/'`shader/ilo_shader_cs.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_cs.Tpo $(DEPDIR)/ilo_shader_cs.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_cs.c' object='ilo_shader_cs.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_cs.lo `test -f 'shader/ilo_shader_cs.c' || echo '$(srcdir)/'`shader/ilo_shader_cs.c
 
ilo_shader_fs.lo: shader/ilo_shader_fs.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_fs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_fs.Tpo -c -o ilo_shader_fs.lo `test -f 'shader/ilo_shader_fs.c' || echo '$(srcdir)/'`shader/ilo_shader_fs.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_fs.Tpo $(DEPDIR)/ilo_shader_fs.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_fs.c' object='ilo_shader_fs.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_fs.lo `test -f 'shader/ilo_shader_fs.c' || echo '$(srcdir)/'`shader/ilo_shader_fs.c
 
ilo_shader_gs.lo: shader/ilo_shader_gs.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_gs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_gs.Tpo -c -o ilo_shader_gs.lo `test -f 'shader/ilo_shader_gs.c' || echo '$(srcdir)/'`shader/ilo_shader_gs.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_gs.Tpo $(DEPDIR)/ilo_shader_gs.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_gs.c' object='ilo_shader_gs.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_gs.lo `test -f 'shader/ilo_shader_gs.c' || echo '$(srcdir)/'`shader/ilo_shader_gs.c
 
ilo_shader_vs.lo: shader/ilo_shader_vs.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT ilo_shader_vs.lo -MD -MP -MF $(DEPDIR)/ilo_shader_vs.Tpo -c -o ilo_shader_vs.lo `test -f 'shader/ilo_shader_vs.c' || echo '$(srcdir)/'`shader/ilo_shader_vs.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/ilo_shader_vs.Tpo $(DEPDIR)/ilo_shader_vs.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/ilo_shader_vs.c' object='ilo_shader_vs.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ilo_shader_vs.lo `test -f 'shader/ilo_shader_vs.c' || echo '$(srcdir)/'`shader/ilo_shader_vs.c
 
toy_compiler.lo: shader/toy_compiler.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_compiler.lo -MD -MP -MF $(DEPDIR)/toy_compiler.Tpo -c -o toy_compiler.lo `test -f 'shader/toy_compiler.c' || echo '$(srcdir)/'`shader/toy_compiler.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_compiler.Tpo $(DEPDIR)/toy_compiler.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_compiler.c' object='toy_compiler.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_compiler.lo `test -f 'shader/toy_compiler.c' || echo '$(srcdir)/'`shader/toy_compiler.c
 
toy_compiler_asm.lo: shader/toy_compiler_asm.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_compiler_asm.lo -MD -MP -MF $(DEPDIR)/toy_compiler_asm.Tpo -c -o toy_compiler_asm.lo `test -f 'shader/toy_compiler_asm.c' || echo '$(srcdir)/'`shader/toy_compiler_asm.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_compiler_asm.Tpo $(DEPDIR)/toy_compiler_asm.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_compiler_asm.c' object='toy_compiler_asm.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_compiler_asm.lo `test -f 'shader/toy_compiler_asm.c' || echo '$(srcdir)/'`shader/toy_compiler_asm.c
 
toy_compiler_disasm.lo: shader/toy_compiler_disasm.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_compiler_disasm.lo -MD -MP -MF $(DEPDIR)/toy_compiler_disasm.Tpo -c -o toy_compiler_disasm.lo `test -f 'shader/toy_compiler_disasm.c' || echo '$(srcdir)/'`shader/toy_compiler_disasm.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_compiler_disasm.Tpo $(DEPDIR)/toy_compiler_disasm.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_compiler_disasm.c' object='toy_compiler_disasm.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_compiler_disasm.lo `test -f 'shader/toy_compiler_disasm.c' || echo '$(srcdir)/'`shader/toy_compiler_disasm.c
 
toy_legalize.lo: shader/toy_legalize.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_legalize.lo -MD -MP -MF $(DEPDIR)/toy_legalize.Tpo -c -o toy_legalize.lo `test -f 'shader/toy_legalize.c' || echo '$(srcdir)/'`shader/toy_legalize.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_legalize.Tpo $(DEPDIR)/toy_legalize.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_legalize.c' object='toy_legalize.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_legalize.lo `test -f 'shader/toy_legalize.c' || echo '$(srcdir)/'`shader/toy_legalize.c
 
toy_legalize_ra.lo: shader/toy_legalize_ra.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_legalize_ra.lo -MD -MP -MF $(DEPDIR)/toy_legalize_ra.Tpo -c -o toy_legalize_ra.lo `test -f 'shader/toy_legalize_ra.c' || echo '$(srcdir)/'`shader/toy_legalize_ra.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_legalize_ra.Tpo $(DEPDIR)/toy_legalize_ra.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_legalize_ra.c' object='toy_legalize_ra.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_legalize_ra.lo `test -f 'shader/toy_legalize_ra.c' || echo '$(srcdir)/'`shader/toy_legalize_ra.c
 
toy_optimize.lo: shader/toy_optimize.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_optimize.lo -MD -MP -MF $(DEPDIR)/toy_optimize.Tpo -c -o toy_optimize.lo `test -f 'shader/toy_optimize.c' || echo '$(srcdir)/'`shader/toy_optimize.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_optimize.Tpo $(DEPDIR)/toy_optimize.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_optimize.c' object='toy_optimize.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_optimize.lo `test -f 'shader/toy_optimize.c' || echo '$(srcdir)/'`shader/toy_optimize.c
 
toy_tgsi.lo: shader/toy_tgsi.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT toy_tgsi.lo -MD -MP -MF $(DEPDIR)/toy_tgsi.Tpo -c -o toy_tgsi.lo `test -f 'shader/toy_tgsi.c' || echo '$(srcdir)/'`shader/toy_tgsi.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/toy_tgsi.Tpo $(DEPDIR)/toy_tgsi.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shader/toy_tgsi.c' object='toy_tgsi.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o toy_tgsi.lo `test -f 'shader/toy_tgsi.c' || echo '$(srcdir)/'`shader/toy_tgsi.c
 
mostlyclean-libtool:
-rm -f *.lo
 
clean-libtool:
-rm -rf .libs _libs
 
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
 
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: ctags-am
 
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
 
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
 
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
 
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
 
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
 
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
 
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
 
clean-generic:
 
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
 
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
 
clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
mostlyclean-am
 
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
 
dvi: dvi-am
 
dvi-am:
 
html: html-am
 
html-am:
 
info: info-am
 
info-am:
 
install-data-am:
 
install-dvi: install-dvi-am
 
install-dvi-am:
 
install-exec-am:
 
install-html: install-html-am
 
install-html-am:
 
install-info: install-info-am
 
install-info-am:
 
install-man:
 
install-pdf: install-pdf-am
 
install-pdf-am:
 
install-ps: install-ps-am
 
install-ps-am:
 
installcheck-am:
 
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
 
mostlyclean: mostlyclean-am
 
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
 
pdf: pdf-am
 
pdf-am:
 
ps: ps-am
 
ps-am:
 
uninstall-am:
 
.MAKE: install-am install-strip
 
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
 
 
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/Makefile.sources
0,0 → 1,34
C_SOURCES := \
ilo_3d.c \
ilo_3d_pipeline.c \
ilo_3d_pipeline_dump.c \
ilo_3d_pipeline_gen6.c \
ilo_3d_pipeline_gen7.c \
ilo_blit.c \
ilo_blitter.c \
ilo_blitter_blt.c \
ilo_blitter_pipe.c \
ilo_context.c \
ilo_cp.c \
ilo_format.c \
ilo_gpe_gen6.c \
ilo_gpe_gen7.c \
ilo_gpgpu.c \
ilo_query.c \
ilo_resource.c \
ilo_screen.c \
ilo_shader.c \
ilo_state.c \
ilo_transfer.c \
ilo_video.c \
shader/ilo_shader_cs.c \
shader/ilo_shader_fs.c \
shader/ilo_shader_gs.c \
shader/ilo_shader_vs.c \
shader/toy_compiler.c \
shader/toy_compiler_asm.c \
shader/toy_compiler_disasm.c \
shader/toy_legalize.c \
shader/toy_legalize_ra.c \
shader/toy_optimize.c \
shader/toy_tgsi.c
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d.c
0,0 → 1,796
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "intel_winsys.h"
 
#include "ilo_3d_pipeline.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_query.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_3d.h"
 
static void
process_query_for_occlusion_counter(struct ilo_3d *hw3d,
struct ilo_query *q)
{
uint64_t *vals, depth_count = 0;
int i;
 
/* in pairs */
assert(q->reg_read % 2 == 0);
 
intel_bo_map(q->bo, false);
vals = intel_bo_get_virtual(q->bo);
for (i = 1; i < q->reg_read; i += 2)
depth_count += vals[i] - vals[i - 1];
intel_bo_unmap(q->bo);
 
/* accumulate so that the query can be resumed if wanted */
q->data.u64 += depth_count;
q->reg_read = 0;
}
 
static uint64_t
timestamp_to_ns(uint64_t timestamp)
{
/* see ilo_get_timestamp() */
return (timestamp & 0xffffffff) * 80;
}
 
static void
process_query_for_timestamp(struct ilo_3d *hw3d, struct ilo_query *q)
{
uint64_t *vals, timestamp;
 
assert(q->reg_read == 1);
 
intel_bo_map(q->bo, false);
vals = intel_bo_get_virtual(q->bo);
timestamp = vals[0];
intel_bo_unmap(q->bo);
 
q->data.u64 = timestamp_to_ns(timestamp);
q->reg_read = 0;
}
 
static void
process_query_for_time_elapsed(struct ilo_3d *hw3d, struct ilo_query *q)
{
uint64_t *vals, elapsed = 0;
int i;
 
/* in pairs */
assert(q->reg_read % 2 == 0);
 
intel_bo_map(q->bo, false);
vals = intel_bo_get_virtual(q->bo);
 
for (i = 1; i < q->reg_read; i += 2)
elapsed += vals[i] - vals[i - 1];
 
intel_bo_unmap(q->bo);
 
/* accumulate so that the query can be resumed if wanted */
q->data.u64 += timestamp_to_ns(elapsed);
q->reg_read = 0;
}
 
static void
ilo_3d_resume_queries(struct ilo_3d *hw3d)
{
struct ilo_query *q;
 
/* resume occlusion queries */
LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
/* accumulate the result if the bo is alreay full */
if (q->reg_read >= q->reg_total)
process_query_for_occlusion_counter(hw3d, q);
 
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
q->bo, q->reg_read++);
}
 
/* resume timer queries */
LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
/* accumulate the result if the bo is alreay full */
if (q->reg_read >= q->reg_total)
process_query_for_time_elapsed(hw3d, q);
 
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
}
}
 
static void
ilo_3d_pause_queries(struct ilo_3d *hw3d)
{
struct ilo_query *q;
 
/* pause occlusion queries */
LIST_FOR_EACH_ENTRY(q, &hw3d->occlusion_queries, list) {
assert(q->reg_read < q->reg_total);
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
q->bo, q->reg_read++);
}
 
/* pause timer queries */
LIST_FOR_EACH_ENTRY(q, &hw3d->time_elapsed_queries, list) {
assert(q->reg_read < q->reg_total);
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
}
}
 
static void
ilo_3d_release_render_ring(struct ilo_cp *cp, void *data)
{
struct ilo_3d *hw3d = data;
 
ilo_3d_pause_queries(hw3d);
}
 
static void
ilo_3d_own_render_ring(struct ilo_3d *hw3d)
{
ilo_cp_set_ring(hw3d->cp, ILO_CP_RING_RENDER);
 
if (ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve))
ilo_3d_resume_queries(hw3d);
}
 
/**
* Begin a query.
*/
void
ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q)
{
struct ilo_3d *hw3d = ilo->hw3d;
 
ilo_3d_own_render_ring(hw3d);
 
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
/* reserve some space for pausing the query */
q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, NULL);
hw3d->owner_reserve += q->reg_cmd_size;
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
 
q->data.u64 = 0;
 
if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) {
/* XXX we should check the aperture size */
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
q->bo, q->reg_read++);
 
list_add(&q->list, &hw3d->occlusion_queries);
}
break;
case PIPE_QUERY_TIMESTAMP:
/* nop */
break;
case PIPE_QUERY_TIME_ELAPSED:
/* reserve some space for pausing the query */
q->reg_cmd_size = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
ILO_3D_PIPELINE_WRITE_TIMESTAMP, NULL);
hw3d->owner_reserve += q->reg_cmd_size;
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
 
q->data.u64 = 0;
 
if (ilo_query_alloc_bo(q, 2, -1, hw3d->cp->winsys)) {
/* XXX we should check the aperture size */
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
 
list_add(&q->list, &hw3d->time_elapsed_queries);
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
q->data.u64 = 0;
list_add(&q->list, &hw3d->prim_generated_queries);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
q->data.u64 = 0;
list_add(&q->list, &hw3d->prim_emitted_queries);
break;
default:
assert(!"unknown query type");
break;
}
}
 
/**
* End a query.
*/
void
ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q)
{
struct ilo_3d *hw3d = ilo->hw3d;
 
ilo_3d_own_render_ring(hw3d);
 
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
list_del(&q->list);
 
assert(q->reg_read < q->reg_total);
hw3d->owner_reserve -= q->reg_cmd_size;
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
ilo_3d_pipeline_emit_write_depth_count(hw3d->pipeline,
q->bo, q->reg_read++);
break;
case PIPE_QUERY_TIMESTAMP:
q->data.u64 = 0;
 
if (ilo_query_alloc_bo(q, 1, 1, hw3d->cp->winsys)) {
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
}
break;
case PIPE_QUERY_TIME_ELAPSED:
list_del(&q->list);
 
assert(q->reg_read < q->reg_total);
hw3d->owner_reserve -= q->reg_cmd_size;
ilo_cp_set_owner(hw3d->cp, &hw3d->owner, hw3d->owner_reserve);
ilo_3d_pipeline_emit_write_timestamp(hw3d->pipeline,
q->bo, q->reg_read++);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
list_del(&q->list);
break;
default:
assert(!"unknown query type");
break;
}
}
 
/**
* Process the raw query data.
*/
void
ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q)
{
struct ilo_3d *hw3d = ilo->hw3d;
 
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
if (q->bo)
process_query_for_occlusion_counter(hw3d, q);
break;
case PIPE_QUERY_TIMESTAMP:
if (q->bo)
process_query_for_timestamp(hw3d, q);
break;
case PIPE_QUERY_TIME_ELAPSED:
if (q->bo)
process_query_for_time_elapsed(hw3d, q);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
break;
default:
assert(!"unknown query type");
break;
}
}
 
/**
* Hook for CP new-batch.
*/
void
ilo_3d_cp_flushed(struct ilo_3d *hw3d)
{
if (ilo_debug & ILO_DEBUG_3D)
ilo_3d_pipeline_dump(hw3d->pipeline);
 
/* invalidate the pipeline */
ilo_3d_pipeline_invalidate(hw3d->pipeline,
ILO_3D_PIPELINE_INVALIDATE_BATCH_BO |
ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
if (!hw3d->cp->render_ctx) {
ilo_3d_pipeline_invalidate(hw3d->pipeline,
ILO_3D_PIPELINE_INVALIDATE_HW);
}
 
hw3d->new_batch = true;
}
 
/**
* Create a 3D context.
*/
struct ilo_3d *
ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
{
struct ilo_3d *hw3d;
 
hw3d = CALLOC_STRUCT(ilo_3d);
if (!hw3d)
return NULL;
 
hw3d->cp = cp;
hw3d->owner.release_callback = ilo_3d_release_render_ring;
hw3d->owner.release_data = hw3d;
 
hw3d->new_batch = true;
 
list_inithead(&hw3d->occlusion_queries);
list_inithead(&hw3d->time_elapsed_queries);
list_inithead(&hw3d->prim_generated_queries);
list_inithead(&hw3d->prim_emitted_queries);
 
hw3d->pipeline = ilo_3d_pipeline_create(cp, dev);
if (!hw3d->pipeline) {
FREE(hw3d);
return NULL;
}
 
return hw3d;
}
 
/**
* Destroy a 3D context.
*/
void
ilo_3d_destroy(struct ilo_3d *hw3d)
{
ilo_3d_pipeline_destroy(hw3d->pipeline);
 
if (hw3d->kernel.bo)
intel_bo_unreference(hw3d->kernel.bo);
 
FREE(hw3d);
}
 
static bool
draw_vbo(struct ilo_3d *hw3d, const struct ilo_context *ilo,
int *prim_generated, int *prim_emitted)
{
bool need_flush = false;
int max_len;
 
ilo_3d_own_render_ring(hw3d);
 
if (!hw3d->new_batch) {
/*
* Without a better tracking mechanism, when the framebuffer changes, we
* have to assume that the old framebuffer may be sampled from. If that
* happens in the middle of a batch buffer, we need to insert manual
* flushes.
*/
need_flush = (ilo->dirty & ILO_DIRTY_FB);
 
/* same to SO target changes */
need_flush |= (ilo->dirty & ILO_DIRTY_SO);
}
 
/* make sure there is enough room first */
max_len = ilo_3d_pipeline_estimate_size(hw3d->pipeline,
ILO_3D_PIPELINE_DRAW, ilo);
if (need_flush) {
max_len += ilo_3d_pipeline_estimate_size(hw3d->pipeline,
ILO_3D_PIPELINE_FLUSH, NULL);
}
 
if (max_len > ilo_cp_space(hw3d->cp)) {
ilo_cp_flush(hw3d->cp);
need_flush = false;
assert(max_len <= ilo_cp_space(hw3d->cp));
}
 
if (need_flush)
ilo_3d_pipeline_emit_flush(hw3d->pipeline);
 
return ilo_3d_pipeline_emit_draw(hw3d->pipeline, ilo,
prim_generated, prim_emitted);
}
 
static void
update_prim_count(struct ilo_3d *hw3d, int generated, int emitted)
{
struct ilo_query *q;
 
LIST_FOR_EACH_ENTRY(q, &hw3d->prim_generated_queries, list)
q->data.u64 += generated;
 
LIST_FOR_EACH_ENTRY(q, &hw3d->prim_emitted_queries, list)
q->data.u64 += emitted;
}
 
bool
ilo_3d_pass_render_condition(struct ilo_context *ilo)
{
struct ilo_3d *hw3d = ilo->hw3d;
uint64_t result;
bool wait;
 
if (!hw3d->render_condition.query)
return true;
 
switch (hw3d->render_condition.mode) {
case PIPE_RENDER_COND_WAIT:
case PIPE_RENDER_COND_BY_REGION_WAIT:
wait = true;
break;
case PIPE_RENDER_COND_NO_WAIT:
case PIPE_RENDER_COND_BY_REGION_NO_WAIT:
default:
wait = false;
break;
}
 
if (ilo->base.get_query_result(&ilo->base, hw3d->render_condition.query,
wait, (union pipe_query_result *) &result))
return (!result == hw3d->render_condition.cond);
else
return true;
}
 
#define UPDATE_MIN2(a, b) (a) = MIN2((a), (b))
#define UPDATE_MAX2(a, b) (a) = MAX2((a), (b))
 
/**
* \see find_sub_primitives() from core mesa
*/
static int
ilo_find_sub_primitives(const void *elements, unsigned element_size,
const struct pipe_draw_info *orig_info,
struct pipe_draw_info *info)
{
const unsigned max_prims = orig_info->count - orig_info->start;
unsigned i, cur_start, cur_count;
int scan_index;
unsigned scan_num;
 
cur_start = orig_info->start;
cur_count = 0;
scan_num = 0;
 
#define IB_INDEX_READ(TYPE, INDEX) (((const TYPE *) elements)[INDEX])
 
#define SCAN_ELEMENTS(TYPE) \
info[scan_num] = *orig_info; \
info[scan_num].primitive_restart = false; \
for (i = orig_info->start; i < orig_info->count; i++) { \
scan_index = IB_INDEX_READ(TYPE, i); \
if (scan_index == orig_info->restart_index) { \
if (cur_count > 0) { \
assert(scan_num < max_prims); \
info[scan_num].start = cur_start; \
info[scan_num].count = cur_count; \
scan_num++; \
info[scan_num] = *orig_info; \
info[scan_num].primitive_restart = false; \
} \
cur_start = i + 1; \
cur_count = 0; \
} \
else { \
UPDATE_MIN2(info[scan_num].min_index, scan_index); \
UPDATE_MAX2(info[scan_num].max_index, scan_index); \
cur_count++; \
} \
} \
if (cur_count > 0) { \
assert(scan_num < max_prims); \
info[scan_num].start = cur_start; \
info[scan_num].count = cur_count; \
scan_num++; \
}
 
switch (element_size) {
case 1:
SCAN_ELEMENTS(uint8_t);
break;
case 2:
SCAN_ELEMENTS(uint16_t);
break;
case 4:
SCAN_ELEMENTS(uint32_t);
break;
default:
assert(0 && "bad index_size in find_sub_primitives()");
}
 
#undef SCAN_ELEMENTS
 
return scan_num;
}
 
static inline bool
ilo_check_restart_index(const struct ilo_context *ilo, unsigned restart_index)
{
/*
* Haswell (GEN(7.5)) supports an arbitrary cut index, check everything
* older.
*/
if (ilo->dev->gen >= ILO_GEN(7.5))
return true;
 
/* Note: indices must be unsigned byte, unsigned short or unsigned int */
switch (ilo->ib.index_size) {
case 1:
return ((restart_index & 0xff) == 0xff);
break;
case 2:
return ((restart_index & 0xffff) == 0xffff);
break;
case 4:
return (restart_index == 0xffffffff);
break;
}
return false;
}
 
static inline bool
ilo_check_restart_prim_type(const struct ilo_context *ilo, unsigned prim)
{
switch (prim) {
case PIPE_PRIM_POINTS:
case PIPE_PRIM_LINES:
case PIPE_PRIM_LINE_STRIP:
case PIPE_PRIM_TRIANGLES:
case PIPE_PRIM_TRIANGLE_STRIP:
/* All 965 GEN graphics support a cut index for these primitive types */
return true;
break;
 
case PIPE_PRIM_LINE_LOOP:
case PIPE_PRIM_POLYGON:
case PIPE_PRIM_QUAD_STRIP:
case PIPE_PRIM_QUADS:
case PIPE_PRIM_TRIANGLE_FAN:
if (ilo->dev->gen >= ILO_GEN(7.5)) {
/* Haswell and newer parts can handle these prim types. */
return true;
}
break;
}
 
return false;
}
 
/*
* Handle VBOs using primitive restart.
* Verify that restart index and primitive type can be handled by the HW.
* Return true if this routine did the rendering
* Return false if this routine did NOT render because restart can be handled
* in HW.
*/
static void
ilo_draw_vbo_with_sw_restart(struct pipe_context *pipe,
const struct pipe_draw_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
struct pipe_draw_info *restart_info = NULL;
int sub_prim_count = 1;
 
/*
* We have to break up the primitive into chunks manually
* Worst case, every other index could be a restart index so
* need to have space for that many primitives
*/
restart_info = MALLOC(((info->count + 1) / 2) * sizeof(*info));
if (NULL == restart_info) {
/* If we can't get memory for this, bail out */
ilo_err("%s:%d - Out of memory", __FILE__, __LINE__);
return;
}
 
if (ilo->ib.buffer) {
struct pipe_transfer *transfer;
const void *map;
 
map = pipe_buffer_map(pipe, ilo->ib.buffer,
PIPE_TRANSFER_READ, &transfer);
 
sub_prim_count = ilo_find_sub_primitives(map + ilo->ib.offset,
ilo->ib.index_size, info, restart_info);
 
pipe_buffer_unmap(pipe, transfer);
}
else {
sub_prim_count = ilo_find_sub_primitives(ilo->ib.user_buffer,
ilo->ib.index_size, info, restart_info);
}
 
info = restart_info;
 
while (sub_prim_count > 0) {
pipe->draw_vbo(pipe, info);
 
sub_prim_count--;
info++;
}
 
FREE(restart_info);
}
 
static bool
upload_shaders(struct ilo_3d *hw3d, struct ilo_shader_cache *shc)
{
bool incremental = true;
int upload;
 
upload = ilo_shader_cache_upload(shc,
NULL, hw3d->kernel.used, incremental);
if (!upload)
return true;
 
/*
* Allocate a new bo. When this is a new batch, assume the bo is still in
* use by the previous batch and force allocation.
*
* Does it help to make shader cache upload with unsynchronized mapping,
* and remove the check for new batch here?
*/
if (hw3d->kernel.used + upload > hw3d->kernel.size || hw3d->new_batch) {
unsigned new_size = (hw3d->kernel.size) ?
hw3d->kernel.size : (8 * 1024);
 
while (hw3d->kernel.used + upload > new_size)
new_size *= 2;
 
if (hw3d->kernel.bo)
intel_bo_unreference(hw3d->kernel.bo);
 
hw3d->kernel.bo = intel_winsys_alloc_buffer(hw3d->cp->winsys,
"kernel bo", new_size, 0);
if (!hw3d->kernel.bo) {
ilo_err("failed to allocate kernel bo\n");
return false;
}
 
hw3d->kernel.used = 0;
hw3d->kernel.size = new_size;
incremental = false;
 
assert(new_size >= ilo_shader_cache_upload(shc,
NULL, hw3d->kernel.used, incremental));
 
ilo_3d_pipeline_invalidate(hw3d->pipeline,
ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
}
 
upload = ilo_shader_cache_upload(shc,
hw3d->kernel.bo, hw3d->kernel.used, incremental);
if (upload < 0) {
ilo_err("failed to upload shaders\n");
return false;
}
 
hw3d->kernel.used += upload;
 
assert(hw3d->kernel.used <= hw3d->kernel.size);
 
return true;
}
 
static void
ilo_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_3d *hw3d = ilo->hw3d;
int prim_generated, prim_emitted;
 
if (!ilo_3d_pass_render_condition(ilo))
return;
 
if (info->primitive_restart && info->indexed) {
/*
* Want to draw an indexed primitive using primitive restart
* Check that HW can handle the request and fall to SW if not.
*/
if (!ilo_check_restart_index(ilo, info->restart_index) ||
!ilo_check_restart_prim_type(ilo, info->mode)) {
ilo_draw_vbo_with_sw_restart(pipe, info);
return;
}
}
 
ilo_finalize_3d_states(ilo, info);
 
if (!upload_shaders(hw3d, ilo->shader_cache))
return;
 
/* If draw_vbo ever fails, return immediately. */
if (!draw_vbo(hw3d, ilo, &prim_generated, &prim_emitted))
return;
 
/* clear dirty status */
ilo->dirty = 0x0;
hw3d->new_batch = false;
 
/* avoid dangling pointer reference */
ilo->draw = NULL;
 
update_prim_count(hw3d, prim_generated, prim_emitted);
 
if (ilo_debug & ILO_DEBUG_NOCACHE)
ilo_3d_pipeline_emit_flush(hw3d->pipeline);
}
 
static void
ilo_render_condition(struct pipe_context *pipe,
struct pipe_query *query,
boolean condition,
uint mode)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_3d *hw3d = ilo->hw3d;
 
/* reference count? */
hw3d->render_condition.query = query;
hw3d->render_condition.mode = mode;
hw3d->render_condition.cond = condition;
}
 
static void
ilo_texture_barrier(struct pipe_context *pipe)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_3d *hw3d = ilo->hw3d;
 
if (ilo->cp->ring != ILO_CP_RING_RENDER)
return;
 
ilo_3d_pipeline_emit_flush(hw3d->pipeline);
 
/* don't know why */
if (ilo->dev->gen >= ILO_GEN(7))
ilo_cp_flush(hw3d->cp);
}
 
static void
ilo_get_sample_position(struct pipe_context *pipe,
unsigned sample_count,
unsigned sample_index,
float *out_value)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_3d *hw3d = ilo->hw3d;
 
ilo_3d_pipeline_get_sample_position(hw3d->pipeline,
sample_count, sample_index,
&out_value[0], &out_value[1]);
}
 
/**
* Initialize 3D-related functions.
*/
void
ilo_init_3d_functions(struct ilo_context *ilo)
{
ilo->base.draw_vbo = ilo_draw_vbo;
ilo->base.render_condition = ilo_render_condition;
ilo->base.texture_barrier = ilo_texture_barrier;
ilo->base.get_sample_position = ilo_get_sample_position;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d.h
0,0 → 1,91
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_3D_H
#define ILO_3D_H
 
#include "ilo_common.h"
#include "ilo_cp.h"
 
struct ilo_3d_pipeline;
struct ilo_context;
struct ilo_query;
 
/**
* 3D context.
*/
struct ilo_3d {
struct ilo_cp *cp;
struct ilo_cp_owner owner;
int owner_reserve;
 
bool new_batch;
 
struct {
struct intel_bo *bo;
unsigned used, size;
} kernel;
 
struct {
struct pipe_query *query;
unsigned mode;
bool cond;
} render_condition;
 
struct list_head occlusion_queries;
struct list_head time_elapsed_queries;
struct list_head prim_generated_queries;
struct list_head prim_emitted_queries;
 
struct ilo_3d_pipeline *pipeline;
};
 
struct ilo_3d *
ilo_3d_create(struct ilo_cp *cp, const struct ilo_dev_info *dev);
 
void
ilo_3d_destroy(struct ilo_3d *hw3d);
 
void
ilo_3d_cp_flushed(struct ilo_3d *hw3d);
 
void
ilo_3d_begin_query(struct ilo_context *ilo, struct ilo_query *q);
 
void
ilo_3d_end_query(struct ilo_context *ilo, struct ilo_query *q);
 
void
ilo_3d_process_query(struct ilo_context *ilo, struct ilo_query *q);
 
bool
ilo_3d_pass_render_condition(struct ilo_context *ilo);
 
void
ilo_init_3d_functions(struct ilo_context *ilo);
 
#endif /* ILO_3D_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.c
0,0 → 1,291
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_prim.h"
#include "intel_winsys.h"
 
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_state.h"
#include "ilo_3d_pipeline_gen6.h"
#include "ilo_3d_pipeline_gen7.h"
#include "ilo_3d_pipeline.h"
 
/* in U0.4 */
struct sample_position {
uint8_t x, y;
};
 
/* \see gen6_get_sample_position() */
static const struct sample_position sample_position_1x[1] = {
{ 8, 8 },
};
 
static const struct sample_position sample_position_4x[4] = {
{ 6, 2 }, /* distance from the center is sqrt(40) */
{ 14, 6 }, /* distance from the center is sqrt(40) */
{ 2, 10 }, /* distance from the center is sqrt(40) */
{ 10, 14 }, /* distance from the center is sqrt(40) */
};
 
static const struct sample_position sample_position_8x[8] = {
{ 7, 9 }, /* distance from the center is sqrt(2) */
{ 9, 13 }, /* distance from the center is sqrt(26) */
{ 11, 3 }, /* distance from the center is sqrt(34) */
{ 13, 11 }, /* distance from the center is sqrt(34) */
{ 1, 7 }, /* distance from the center is sqrt(50) */
{ 5, 1 }, /* distance from the center is sqrt(58) */
{ 15, 5 }, /* distance from the center is sqrt(58) */
{ 3, 15 }, /* distance from the center is sqrt(74) */
};
 
struct ilo_3d_pipeline *
ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev)
{
struct ilo_3d_pipeline *p;
int i;
 
p = CALLOC_STRUCT(ilo_3d_pipeline);
if (!p)
return NULL;
 
p->cp = cp;
p->dev = dev;
 
switch (p->dev->gen) {
case ILO_GEN(6):
ilo_3d_pipeline_init_gen6(p);
break;
case ILO_GEN(7):
ilo_3d_pipeline_init_gen7(p);
break;
default:
assert(!"unsupported GEN");
FREE(p);
return NULL;
break;
}
 
p->invalidate_flags = ILO_3D_PIPELINE_INVALIDATE_ALL;
 
p->workaround_bo = intel_winsys_alloc_buffer(p->cp->winsys,
"PIPE_CONTROL workaround", 4096, 0);
if (!p->workaround_bo) {
ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n");
FREE(p);
return NULL;
}
 
p->packed_sample_position_1x =
sample_position_1x[0].x << 4 |
sample_position_1x[0].y;
 
/* pack into dwords */
for (i = 0; i < 4; i++) {
p->packed_sample_position_4x |=
sample_position_4x[i].x << (8 * i + 4) |
sample_position_4x[i].y << (8 * i);
 
p->packed_sample_position_8x[0] |=
sample_position_8x[i].x << (8 * i + 4) |
sample_position_8x[i].y << (8 * i);
 
p->packed_sample_position_8x[1] |=
sample_position_8x[4 + i].x << (8 * i + 4) |
sample_position_8x[4 + i].y << (8 * i);
}
 
return p;
}
 
void
ilo_3d_pipeline_destroy(struct ilo_3d_pipeline *p)
{
if (p->workaround_bo)
intel_bo_unreference(p->workaround_bo);
 
FREE(p);
}
 
static void
handle_invalid_batch_bo(struct ilo_3d_pipeline *p, bool unset)
{
if (p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_BATCH_BO) {
if (p->dev->gen == ILO_GEN(6))
p->state.has_gen6_wa_pipe_control = false;
 
if (unset)
p->invalidate_flags &= ~ILO_3D_PIPELINE_INVALIDATE_BATCH_BO;
}
}
 
/**
* Emit context states and 3DPRIMITIVE.
*/
bool
ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
int *prim_generated, int *prim_emitted)
{
bool success;
 
if (ilo->dirty & ILO_DIRTY_SO &&
ilo->so.enabled && !ilo->so.append_bitmask) {
/*
* We keep track of the SVBI in the driver, so that we can restore it
* when the HW context is invalidated (by another process). The value
* needs to be reset when stream output is enabled and the targets are
* changed.
*/
p->state.so_num_vertices = 0;
 
/* on GEN7+, we need SOL_RESET to reset the SO write offsets */
if (p->dev->gen >= ILO_GEN(7))
ilo_cp_set_one_off_flags(p->cp, INTEL_EXEC_GEN7_SOL_RESET);
}
 
 
while (true) {
struct ilo_cp_jmp_buf jmp;
int err;
 
/* we will rewind if aperture check below fails */
ilo_cp_setjmp(p->cp, &jmp);
 
handle_invalid_batch_bo(p, false);
 
/* draw! */
ilo_cp_assert_no_implicit_flush(p->cp, true);
p->emit_draw(p, ilo);
ilo_cp_assert_no_implicit_flush(p->cp, false);
 
err = intel_winsys_check_aperture_space(ilo->winsys, &p->cp->bo, 1);
if (!err) {
success = true;
break;
}
 
/* rewind */
ilo_cp_longjmp(p->cp, &jmp);
 
if (ilo_cp_empty(p->cp)) {
success = false;
break;
}
else {
/* flush and try again */
ilo_cp_flush(p->cp);
}
}
 
if (success) {
const int num_verts =
u_vertices_per_prim(u_reduced_prim(ilo->draw->mode));
const int max_emit =
(p->state.so_max_vertices - p->state.so_num_vertices) / num_verts;
const int generated =
u_reduced_prims_for_vertices(ilo->draw->mode, ilo->draw->count);
const int emitted = MIN2(generated, max_emit);
 
p->state.so_num_vertices += emitted * num_verts;
 
if (prim_generated)
*prim_generated = generated;
 
if (prim_emitted)
*prim_emitted = emitted;
}
 
p->invalidate_flags = 0x0;
 
return success;
}
 
/**
* Emit PIPE_CONTROL to flush all caches.
*/
void
ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p)
{
handle_invalid_batch_bo(p, true);
p->emit_flush(p);
}
 
/**
* Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_TIMESTAMP post-sync op.
*/
void
ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index)
{
handle_invalid_batch_bo(p, true);
p->emit_write_timestamp(p, bo, index);
}
 
/**
* Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_DEPTH_COUNT post-sync op.
*/
void
ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index)
{
handle_invalid_batch_bo(p, true);
p->emit_write_depth_count(p, bo, index);
}
 
void
ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p,
unsigned sample_count,
unsigned sample_index,
float *x, float *y)
{
const struct sample_position *pos;
 
switch (sample_count) {
case 1:
assert(sample_index < Elements(sample_position_1x));
pos = sample_position_1x;
break;
case 4:
assert(sample_index < Elements(sample_position_4x));
pos = sample_position_4x;
break;
case 8:
assert(sample_index < Elements(sample_position_8x));
pos = sample_position_8x;
break;
default:
assert(!"unknown sample count");
*x = 0.5f;
*y = 0.5f;
return;
break;
}
 
*x = (float) pos[sample_index].x / 16.0f;
*y = (float) pos[sample_index].y / 16.0f;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline.h
0,0 → 1,281
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_3D_PIPELINE_H
#define ILO_3D_PIPELINE_H
 
#include "ilo_common.h"
#include "ilo_context.h"
#include "ilo_gpe_gen6.h"
#include "ilo_gpe_gen7.h"
 
struct intel_bo;
struct ilo_cp;
struct ilo_context;
 
enum ilo_3d_pipeline_invalidate_flags {
ILO_3D_PIPELINE_INVALIDATE_HW = 1 << 0,
ILO_3D_PIPELINE_INVALIDATE_BATCH_BO = 1 << 1,
ILO_3D_PIPELINE_INVALIDATE_STATE_BO = 1 << 2,
ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO = 1 << 3,
 
ILO_3D_PIPELINE_INVALIDATE_ALL = 0xffffffff,
};
 
enum ilo_3d_pipeline_action {
ILO_3D_PIPELINE_DRAW,
ILO_3D_PIPELINE_FLUSH,
ILO_3D_PIPELINE_WRITE_TIMESTAMP,
ILO_3D_PIPELINE_WRITE_DEPTH_COUNT,
};
 
/**
* 3D pipeline.
*/
struct ilo_3d_pipeline {
struct ilo_cp *cp;
const struct ilo_dev_info *dev;
 
uint32_t invalidate_flags;
 
struct intel_bo *workaround_bo;
 
uint32_t packed_sample_position_1x;
uint32_t packed_sample_position_4x;
uint32_t packed_sample_position_8x[2];
 
int (*estimate_size)(struct ilo_3d_pipeline *pipeline,
enum ilo_3d_pipeline_action action,
const void *arg);
 
void (*emit_draw)(struct ilo_3d_pipeline *pipeline,
const struct ilo_context *ilo);
 
void (*emit_flush)(struct ilo_3d_pipeline *pipeline);
 
void (*emit_write_timestamp)(struct ilo_3d_pipeline *pipeline,
struct intel_bo *bo, int index);
 
void (*emit_write_depth_count)(struct ilo_3d_pipeline *pipeline,
struct intel_bo *bo, int index);
 
/**
* all GPE functions of all GENs
*/
#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name gen6_ ## name
GEN6_EMIT(STATE_BASE_ADDRESS);
GEN6_EMIT(STATE_SIP);
GEN6_EMIT(PIPELINE_SELECT);
GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS);
GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS);
GEN6_EMIT(3DSTATE_URB);
GEN6_EMIT(3DSTATE_VERTEX_BUFFERS);
GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS);
GEN6_EMIT(3DSTATE_INDEX_BUFFER);
GEN6_EMIT(3DSTATE_VF_STATISTICS);
GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS);
GEN6_EMIT(3DSTATE_CC_STATE_POINTERS);
GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS);
GEN6_EMIT(3DSTATE_VS);
GEN6_EMIT(3DSTATE_GS);
GEN6_EMIT(3DSTATE_CLIP);
GEN6_EMIT(3DSTATE_SF);
GEN6_EMIT(3DSTATE_WM);
GEN6_EMIT(3DSTATE_CONSTANT_VS);
GEN6_EMIT(3DSTATE_CONSTANT_GS);
GEN6_EMIT(3DSTATE_CONSTANT_PS);
GEN6_EMIT(3DSTATE_SAMPLE_MASK);
GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE);
GEN6_EMIT(3DSTATE_DEPTH_BUFFER);
GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET);
GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN);
GEN6_EMIT(3DSTATE_LINE_STIPPLE);
GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS);
GEN6_EMIT(3DSTATE_GS_SVB_INDEX);
GEN6_EMIT(3DSTATE_MULTISAMPLE);
GEN6_EMIT(3DSTATE_STENCIL_BUFFER);
GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER);
GEN6_EMIT(3DSTATE_CLEAR_PARAMS);
GEN6_EMIT(PIPE_CONTROL);
GEN6_EMIT(3DPRIMITIVE);
GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA);
GEN6_EMIT(SF_VIEWPORT);
GEN6_EMIT(CLIP_VIEWPORT);
GEN6_EMIT(CC_VIEWPORT);
GEN6_EMIT(COLOR_CALC_STATE);
GEN6_EMIT(BLEND_STATE);
GEN6_EMIT(DEPTH_STENCIL_STATE);
GEN6_EMIT(SCISSOR_RECT);
GEN6_EMIT(BINDING_TABLE_STATE);
GEN6_EMIT(SURFACE_STATE);
GEN6_EMIT(so_SURFACE_STATE);
GEN6_EMIT(SAMPLER_STATE);
GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE);
GEN6_EMIT(push_constant_buffer);
#undef GEN6_EMIT
 
#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name gen7_ ## name
GEN7_EMIT(3DSTATE_DEPTH_BUFFER);
GEN7_EMIT(3DSTATE_CC_STATE_POINTERS);
GEN7_EMIT(3DSTATE_GS);
GEN7_EMIT(3DSTATE_SF);
GEN7_EMIT(3DSTATE_WM);
GEN7_EMIT(3DSTATE_SAMPLE_MASK);
GEN7_EMIT(3DSTATE_CONSTANT_HS);
GEN7_EMIT(3DSTATE_CONSTANT_DS);
GEN7_EMIT(3DSTATE_HS);
GEN7_EMIT(3DSTATE_TE);
GEN7_EMIT(3DSTATE_DS);
GEN7_EMIT(3DSTATE_STREAMOUT);
GEN7_EMIT(3DSTATE_SBE);
GEN7_EMIT(3DSTATE_PS);
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS);
GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS);
GEN7_EMIT(3DSTATE_URB_VS);
GEN7_EMIT(3DSTATE_URB_HS);
GEN7_EMIT(3DSTATE_URB_DS);
GEN7_EMIT(3DSTATE_URB_GS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
GEN7_EMIT(3DSTATE_SO_DECL_LIST);
GEN7_EMIT(3DSTATE_SO_BUFFER);
GEN7_EMIT(SF_CLIP_VIEWPORT);
#undef GEN7_EMIT
 
/**
* HW states.
*/
struct ilo_3d_pipeline_state {
bool has_gen6_wa_pipe_control;
 
bool primitive_restart;
int reduced_prim;
int so_num_vertices, so_max_vertices;
 
uint32_t SF_VIEWPORT;
uint32_t CLIP_VIEWPORT;
uint32_t SF_CLIP_VIEWPORT; /* GEN7+ */
uint32_t CC_VIEWPORT;
 
uint32_t COLOR_CALC_STATE;
uint32_t BLEND_STATE;
uint32_t DEPTH_STENCIL_STATE;
 
uint32_t SCISSOR_RECT;
 
struct {
uint32_t BINDING_TABLE_STATE;
int BINDING_TABLE_STATE_size;
uint32_t SURFACE_STATE[ILO_MAX_VS_SURFACES];
uint32_t SAMPLER_STATE;
uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS];
uint32_t PUSH_CONSTANT_BUFFER;
int PUSH_CONSTANT_BUFFER_size;
} vs;
 
struct {
uint32_t BINDING_TABLE_STATE;
int BINDING_TABLE_STATE_size;
uint32_t SURFACE_STATE[ILO_MAX_GS_SURFACES];
bool active;
} gs;
 
struct {
uint32_t BINDING_TABLE_STATE;
int BINDING_TABLE_STATE_size;
uint32_t SURFACE_STATE[ILO_MAX_WM_SURFACES];
uint32_t SAMPLER_STATE;
uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS];
} wm;
} state;
};
 
struct ilo_3d_pipeline *
ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev);
 
void
ilo_3d_pipeline_destroy(struct ilo_3d_pipeline *pipeline);
 
 
static inline void
ilo_3d_pipeline_invalidate(struct ilo_3d_pipeline *p, uint32_t flags)
{
p->invalidate_flags |= flags;
}
 
/**
* Estimate the size of an action.
*/
static inline int
ilo_3d_pipeline_estimate_size(struct ilo_3d_pipeline *pipeline,
enum ilo_3d_pipeline_action action,
const void *arg)
{
return pipeline->estimate_size(pipeline, action, arg);
}
 
bool
ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
int *prim_generated, int *prim_emitted);
 
void
ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p);
 
void
ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index);
 
void
ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index);
 
void
ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p,
unsigned sample_count,
unsigned sample_index,
float *x, float *y);
 
void
ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p);
 
#endif /* ILO_3D_PIPELINE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c
0,0 → 1,643
/*
* Copyright © 2007 Intel Corporation
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "intel_winsys.h"
 
#include "ilo_cp.h"
#include "ilo_3d_pipeline.h"
 
#define PRINTFLIKE(f, a) _util_printf_format(f, a)
typedef short GLshort;
typedef int GLint;
typedef unsigned char GLubyte;
typedef unsigned int GLuint;
typedef float GLfloat;
#include <stdint.h>
#include <stdarg.h>
#include <stdio.h>
#include "brw_structs.h"
#include "brw_defines.h"
 
struct intel_context {
int gen;
 
struct {
struct {
void *virtual;
} *bo, bo_dst;
} batch;
};
 
struct brw_context {
struct intel_context intel;
};
 
static void
batch_out(struct brw_context *brw, const char *name, uint32_t offset,
int index, char *fmt, ...) PRINTFLIKE(5, 6);
 
static void
batch_out(struct brw_context *brw, const char *name, uint32_t offset,
int index, char *fmt, ...)
{
struct intel_context *intel = &brw->intel;
uint32_t *data = intel->batch.bo->virtual + offset;
va_list va;
 
fprintf(stderr, "0x%08x: 0x%08x: %8s: ",
offset + index * 4, data[index], name);
va_start(va, fmt);
vfprintf(stderr, fmt, va);
va_end(va);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_surface_format(unsigned int surface_format)
{
switch (surface_format) {
case 0x000: return "r32g32b32a32_float";
case 0x0c1: return "b8g8r8a8_unorm";
case 0x100: return "b5g6r5_unorm";
case 0x102: return "b5g5r5a1_unorm";
case 0x104: return "b4g4r4a4_unorm";
default: return "unknown";
}
}
 
static void dump_vs_state(struct brw_context *brw, uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "VS_STATE";
struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
batch_out(brw, name, offset, 2, "thread2\n");
batch_out(brw, name, offset, 3, "thread3\n");
batch_out(brw, name, offset, 4, "thread4: %d threads\n",
vs->thread4.max_threads + 1);
batch_out(brw, name, offset, 5, "vs5\n");
batch_out(brw, name, offset, 6, "vs6\n");
}
 
static void dump_gs_state(struct brw_context *brw, uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "GS_STATE";
struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
batch_out(brw, name, offset, 2, "thread2\n");
batch_out(brw, name, offset, 3, "thread3\n");
batch_out(brw, name, offset, 4, "thread4: %d threads\n",
gs->thread4.max_threads + 1);
batch_out(brw, name, offset, 5, "vs5\n");
batch_out(brw, name, offset, 6, "vs6\n");
}
 
static void dump_clip_state(struct brw_context *brw, uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "CLIP_STATE";
struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
batch_out(brw, name, offset, 2, "thread2\n");
batch_out(brw, name, offset, 3, "thread3\n");
batch_out(brw, name, offset, 4, "thread4: %d threads\n",
clip->thread4.max_threads + 1);
batch_out(brw, name, offset, 5, "clip5\n");
batch_out(brw, name, offset, 6, "clip6\n");
batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin);
batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax);
batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin);
batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax);
}
 
static void dump_sf_state(struct brw_context *brw, uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "SF_STATE";
struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
batch_out(brw, name, offset, 2, "thread2\n");
batch_out(brw, name, offset, 3, "thread3\n");
batch_out(brw, name, offset, 4, "thread4: %d threads\n",
sf->thread4.max_threads + 1);
batch_out(brw, name, offset, 5, "sf5: viewport offset\n");
batch_out(brw, name, offset, 6, "sf6\n");
batch_out(brw, name, offset, 7, "sf7\n");
}
 
static void dump_wm_state(struct brw_context *brw, uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "WM_STATE";
struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "thread0\n");
batch_out(brw, name, offset, 1, "thread1\n");
batch_out(brw, name, offset, 2, "thread2\n");
batch_out(brw, name, offset, 3, "thread3\n");
batch_out(brw, name, offset, 4, "wm4\n");
batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n",
wm->wm5.enable_8_pix ? "8pix" : "",
wm->wm5.enable_16_pix ? "16pix" : "",
wm->wm5.program_uses_depth ? ", uses depth" : "",
wm->wm5.program_computes_depth ? ", computes depth" : "",
wm->wm5.program_uses_killpixel ? ", kills" : "",
wm->wm5.thread_dispatch_enable ? "" : ", no dispatch",
wm->wm5.max_threads + 1);
batch_out(brw, name, offset, 6, "depth offset constant %f\n",
wm->global_depth_offset_constant);
batch_out(brw, name, offset, 7, "depth offset scale %f\n",
wm->global_depth_offset_scale);
batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n");
batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n");
batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n");
}
 
static void dump_surface_state(struct brw_context *brw, uint32_t offset)
{
const char *name = "SURF";
uint32_t *surf = brw->intel.batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "%s %s\n",
get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)));
batch_out(brw, name, offset, 1, "offset\n");
batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n",
GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1,
GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1,
GET_FIELD(surf[2], BRW_SURFACE_LOD));
batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n",
GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1,
(surf[3] & BRW_SURFACE_TILED) ?
((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not");
batch_out(brw, name, offset, 4, "mip base %d\n",
GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD));
batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n",
GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET),
GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET));
}
 
static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset)
{
const char *name = "SURF";
uint32_t *surf = brw->intel.batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "%s %s\n",
get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)));
batch_out(brw, name, offset, 1, "offset\n");
batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n",
GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1,
GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1,
surf[5] & INTEL_MASK(3, 0));
batch_out(brw, name, offset, 3, "pitch %d, %stiled\n",
(surf[3] & INTEL_MASK(17, 0)) + 1,
(surf[0] & (1 << 14)) ? "" : "not ");
batch_out(brw, name, offset, 4, "mip base %d\n",
GET_FIELD(surf[5], GEN7_SURFACE_MIN_LOD));
batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n",
GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET),
GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET));
}
 
static void
dump_sdc(struct brw_context *brw, uint32_t offset)
{
const char *name = "SDC";
struct intel_context *intel = &brw->intel;
 
if (intel->gen >= 5 && intel->gen <= 6) {
struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual +
offset);
batch_out(brw, name, offset, 0, "unorm rgba\n");
batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]);
batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]);
batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]);
batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]);
batch_out(brw, name, offset, 5, "half float rg\n");
batch_out(brw, name, offset, 6, "half float ba\n");
batch_out(brw, name, offset, 7, "u16 rg\n");
batch_out(brw, name, offset, 8, "u16 ba\n");
batch_out(brw, name, offset, 9, "s16 rg\n");
batch_out(brw, name, offset, 10, "s16 ba\n");
batch_out(brw, name, offset, 11, "s8 rgba\n");
} else {
struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual +
offset);
batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]);
batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]);
batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]);
batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]);
}
}
 
static void dump_sampler_state(struct brw_context *brw,
uint32_t offset, uint32_t size)
{
struct intel_context *intel = &brw->intel;
int i;
struct brw_sampler_state *samp = intel->batch.bo->virtual + offset;
 
assert(intel->gen < 7);
 
for (i = 0; i < size / sizeof(*samp); i++) {
char name[20];
 
sprintf(name, "WM SAMP%d", i);
batch_out(brw, name, offset, 0, "filtering\n");
batch_out(brw, name, offset, 1, "wrapping, lod\n");
batch_out(brw, name, offset, 2, "default color pointer\n");
batch_out(brw, name, offset, 3, "chroma key, aniso\n");
 
samp++;
offset += sizeof(*samp);
}
}
 
static void dump_gen7_sampler_state(struct brw_context *brw,
uint32_t offset, uint32_t size)
{
struct intel_context *intel = &brw->intel;
struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset;
int i;
 
assert(intel->gen >= 7);
 
for (i = 0; i < size / sizeof(*samp); i++) {
char name[20];
 
sprintf(name, "WM SAMP%d", i);
batch_out(brw, name, offset, 0, "filtering\n");
batch_out(brw, name, offset, 1, "wrapping, lod\n");
batch_out(brw, name, offset, 2, "default color pointer\n");
batch_out(brw, name, offset, 3, "chroma key, aniso\n");
 
samp++;
offset += sizeof(*samp);
}
}
 
 
static void dump_sf_viewport_state(struct brw_context *brw,
uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "SF VP";
struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset;
 
assert(intel->gen < 7);
 
batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00);
batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11);
batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22);
batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30);
batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31);
batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32);
 
batch_out(brw, name, offset, 6, "top left = %d,%d\n",
vp->scissor.xmin, vp->scissor.ymin);
batch_out(brw, name, offset, 7, "bottom right = %d,%d\n",
vp->scissor.xmax, vp->scissor.ymax);
}
 
static void dump_clip_viewport_state(struct brw_context *brw,
uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "CLIP VP";
struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset;
 
assert(intel->gen < 7);
 
batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin);
batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax);
batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin);
batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax);
}
 
static void dump_sf_clip_viewport_state(struct brw_context *brw,
uint32_t offset)
{
struct intel_context *intel = &brw->intel;
const char *name = "SF_CLIP VP";
struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset;
 
assert(intel->gen >= 7);
 
batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00);
batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11);
batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22);
batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30);
batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31);
batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32);
batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin);
batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax);
batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin);
batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax);
}
 
 
static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset)
{
const char *name = "CC VP";
struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth);
batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth);
}
 
static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset)
{
const char *name = "D_S";
struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0,
"stencil %sable, func %d, write %sable\n",
ds->ds0.stencil_enable ? "en" : "dis",
ds->ds0.stencil_func,
ds->ds0.stencil_write_enable ? "en" : "dis");
batch_out(brw, name, offset, 1,
"stencil test mask 0x%x, write mask 0x%x\n",
ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask);
batch_out(brw, name, offset, 2,
"depth test %sable, func %d, write %sable\n",
ds->ds2.depth_test_enable ? "en" : "dis",
ds->ds2.depth_test_func,
ds->ds2.depth_write_enable ? "en" : "dis");
}
 
static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset)
{
const char *name = "CC";
 
batch_out(brw, name, offset, 0, "cc0\n");
batch_out(brw, name, offset, 1, "cc1\n");
batch_out(brw, name, offset, 2, "cc2\n");
batch_out(brw, name, offset, 3, "cc3\n");
batch_out(brw, name, offset, 4, "cc4: viewport offset\n");
batch_out(brw, name, offset, 5, "cc5\n");
batch_out(brw, name, offset, 6, "cc6\n");
batch_out(brw, name, offset, 7, "cc7\n");
}
 
static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset)
{
const char *name = "CC";
struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0,
"alpha test format %s, round disable %d, stencil ref %d, "
"bf stencil ref %d\n",
cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8",
cc->cc0.round_disable,
cc->cc0.stencil_ref,
cc->cc0.bf_stencil_ref);
batch_out(brw, name, offset, 1, "\n");
batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r);
batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g);
batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b);
batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a);
}
 
static void dump_blend_state(struct brw_context *brw, uint32_t offset)
{
const char *name = "BLEND";
 
batch_out(brw, name, offset, 0, "\n");
batch_out(brw, name, offset, 1, "\n");
}
 
static void
dump_scissor(struct brw_context *brw, uint32_t offset)
{
const char *name = "SCISSOR";
struct intel_context *intel = &brw->intel;
struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset;
 
batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n",
scissor->xmin, scissor->ymin);
batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n",
scissor->xmax, scissor->ymax);
}
 
static void
dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
{
const char *name = "VS_CONST";
struct intel_context *intel = &brw->intel;
uint32_t *as_uint = intel->batch.bo->virtual + offset;
float *as_float = intel->batch.bo->virtual + offset;
int i;
 
for (i = 0; i < size / 4; i += 4) {
batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
i / 4,
as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
}
}
 
static void
dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
{
const char *name = "WM_CONST";
struct intel_context *intel = &brw->intel;
uint32_t *as_uint = intel->batch.bo->virtual + offset;
float *as_float = intel->batch.bo->virtual + offset;
int i;
 
for (i = 0; i < size / 4; i += 4) {
batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
i / 4,
as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
}
}
 
static void dump_binding_table(struct brw_context *brw, uint32_t offset,
uint32_t size)
{
char name[20];
int i;
uint32_t *data = brw->intel.batch.bo->virtual + offset;
 
for (i = 0; i < size / 4; i++) {
if (data[i] == 0)
continue;
 
sprintf(name, "BIND%d", i);
batch_out(brw, name, offset, i, "surface state address\n");
}
}
 
static void
init_brw(struct brw_context *brw, struct ilo_3d_pipeline *p)
{
brw->intel.gen = ILO_GEN_GET_MAJOR(p->dev->gen);
brw->intel.batch.bo_dst.virtual = intel_bo_get_virtual(p->cp->bo);
brw->intel.batch.bo = &brw->intel.batch.bo_dst;
}
 
static void
dump_3d_state(struct ilo_3d_pipeline *p)
{
struct brw_context brw;
int num_states, i;
 
init_brw(&brw, p);
 
if (brw.intel.gen >= 7) {
dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT);
dump_sf_clip_viewport_state(&brw, p->state.SF_CLIP_VIEWPORT);
}
else {
dump_clip_viewport_state(&brw, p->state.CLIP_VIEWPORT);
dump_sf_viewport_state(&brw, p->state.SF_VIEWPORT);
dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT);
}
 
dump_blend_state(&brw, p->state.BLEND_STATE);
dump_cc_state_gen6(&brw, p->state.COLOR_CALC_STATE);
dump_depth_stencil_state(&brw, p->state.DEPTH_STENCIL_STATE);
 
/* VS */
num_states = p->state.vs.BINDING_TABLE_STATE_size;
for (i = 0; i < num_states; i++) {
if (brw.intel.gen < 7)
dump_surface_state(&brw, p->state.vs.SURFACE_STATE[i]);
else
dump_gen7_surface_state(&brw, p->state.vs.SURFACE_STATE[i]);
}
dump_binding_table(&brw, p->state.vs.BINDING_TABLE_STATE, num_states * 4);
 
num_states = 0;
for (i = 0; i < Elements(p->state.vs.SAMPLER_BORDER_COLOR_STATE); i++) {
if (!p->state.vs.SAMPLER_BORDER_COLOR_STATE[i])
continue;
 
dump_sdc(&brw, p->state.vs.SAMPLER_BORDER_COLOR_STATE[i]);
num_states++;
}
if (brw.intel.gen < 7)
dump_sampler_state(&brw, p->state.vs.SAMPLER_STATE, num_states * 16);
else
dump_gen7_sampler_state(&brw, p->state.vs.SAMPLER_STATE, num_states * 16);
 
if (p->state.vs.PUSH_CONSTANT_BUFFER_size) {
dump_vs_constants(&brw, p->state.vs.PUSH_CONSTANT_BUFFER,
p->state.vs.PUSH_CONSTANT_BUFFER_size);
}
 
/* GS */
num_states = p->state.gs.BINDING_TABLE_STATE_size;
for (i = 0; i < num_states; i++) {
if (!p->state.gs.SURFACE_STATE[i])
continue;
 
if (brw.intel.gen < 7)
dump_surface_state(&brw, p->state.gs.SURFACE_STATE[i]);
else
dump_gen7_surface_state(&brw, p->state.gs.SURFACE_STATE[i]);
}
dump_binding_table(&brw, p->state.gs.BINDING_TABLE_STATE, num_states * 4);
 
/* WM */
num_states = p->state.wm.BINDING_TABLE_STATE_size;
for (i = 0; i < num_states; i++) {
if (!p->state.wm.SURFACE_STATE[i])
continue;
 
if (brw.intel.gen < 7)
dump_surface_state(&brw, p->state.wm.SURFACE_STATE[i]);
else
dump_gen7_surface_state(&brw, p->state.wm.SURFACE_STATE[i]);
}
dump_binding_table(&brw, p->state.wm.BINDING_TABLE_STATE, num_states * 4);
 
num_states = 0;
for (i = 0; i < Elements(p->state.wm.SAMPLER_BORDER_COLOR_STATE); i++) {
if (!p->state.wm.SAMPLER_BORDER_COLOR_STATE[i])
continue;
 
dump_sdc(&brw, p->state.wm.SAMPLER_BORDER_COLOR_STATE[i]);
num_states++;
}
if (brw.intel.gen < 7)
dump_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16);
else
dump_gen7_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16);
 
dump_scissor(&brw, p->state.SCISSOR_RECT);
 
(void) dump_vs_state;
(void) dump_gs_state;
(void) dump_clip_state;
(void) dump_sf_state;
(void) dump_wm_state;
(void) dump_cc_state_gen4;
(void) dump_wm_constants;
}
 
/**
* Dump the pipeline.
*/
void
ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p)
{
int err;
 
ilo_cp_dump(p->cp);
 
err = intel_bo_map(p->cp->bo, false);
if (!err) {
dump_3d_state(p);
intel_bo_unmap(p->cp->bo);
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
0,0 → 1,1670
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_dual_blend.h"
#include "util/u_prim.h"
#include "intel_reg.h"
 
#include "ilo_3d.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_gpe_gen6.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_3d_pipeline.h"
#include "ilo_3d_pipeline_gen6.h"
 
/**
* This should be called before any depth stall flush (including those
* produced by non-pipelined state commands) or cache flush on GEN6.
*
* \see intel_emit_post_sync_nonzero_flush()
*/
static void
gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
bool caller_post_sync)
{
assert(p->dev->gen == ILO_GEN(6));
 
/* emit once */
if (p->state.has_gen6_wa_pipe_control)
return;
 
p->state.has_gen6_wa_pipe_control = true;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 60:
*
* "Pipe-control with CS-stall bit set must be sent BEFORE the
* pipe-control with a post-sync op and no write-cache flushes."
*
* The workaround below necessitates this workaround.
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD,
NULL, 0, false, p->cp);
 
/* the caller will emit the post-sync op */
if (caller_post_sync)
return;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 60:
*
* "Before any depth stall flush (including those produced by
* non-pipelined state commands), software needs to first send a
* PIPE_CONTROL with no bits set except Post-Sync Operation != 0."
*
* "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
* PIPE_CONTROL with any non-zero post-sync-op is required."
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_WRITE_IMMEDIATE,
p->workaround_bo, 0, false, p->cp);
}
 
static void
gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p)
{
assert(p->dev->gen == ILO_GEN(6));
 
gen6_wa_pipe_control_post_sync(p, false);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 305:
*
* "Driver must guarentee that all the caches in the depth pipe are
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
* requires driver to send a PIPE_CONTROL with a CS stall along with a
* Depth Flush prior to this command."
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_CS_STALL,
0, 0, false, p->cp);
}
 
static void
gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p)
{
assert(p->dev->gen == ILO_GEN(6));
 
gen6_wa_pipe_control_post_sync(p, false);
 
/*
* According to intel_emit_depth_stall_flushes() of classic i965, we need
* to emit a sequence of PIPE_CONTROLs prior to emitting depth related
* commands.
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
 
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_CACHE_FLUSH,
NULL, 0, false, p->cp);
 
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
}
 
static void
gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
{
assert(p->dev->gen == ILO_GEN(6));
 
/* the post-sync workaround should cover this already */
if (p->state.has_gen6_wa_pipe_control)
return;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 274:
*
* "A PIPE_CONTROL command, with only the Stall At Pixel Scoreboard
* field set (DW1 Bit 1), must be issued prior to any change to the
* value in this field (Maximum Number of Threads in 3DSTATE_WM)"
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_STALL_AT_SCOREBOARD,
NULL, 0, false, p->cp);
 
}
 
static void
gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p)
{
assert(p->dev->gen == ILO_GEN(6));
 
gen6_wa_pipe_control_post_sync(p, false);
 
/*
* According to upload_vs_state() of classic i965, we need to emit
* PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being
* buffered by VS FF, to the point that the FF dies.
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_INSTRUCTION_FLUSH |
PIPE_CONTROL_STATE_CACHE_INVALIDATE,
NULL, 0, false, p->cp);
}
 
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
 
void
gen6_pipeline_common_select(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* PIPELINE_SELECT */
if (session->hw_ctx_changed) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_PIPELINE_SELECT(p->dev, 0x0, p->cp);
}
}
 
void
gen6_pipeline_common_sip(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* STATE_SIP */
if (session->hw_ctx_changed) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_STATE_SIP(p->dev, 0, p->cp);
}
}
 
void
gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* STATE_BASE_ADDRESS */
if (session->state_bo_changed || session->kernel_bo_changed ||
session->batch_bo_changed) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_STATE_BASE_ADDRESS(p->dev,
NULL, p->cp->bo, p->cp->bo, NULL, ilo->hw3d->kernel.bo,
0, 0, 0, 0, p->cp);
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 28:
*
* "The following commands must be reissued following any change to
* the base addresses:
*
* * 3DSTATE_BINDING_TABLE_POINTERS
* * 3DSTATE_SAMPLER_STATE_POINTERS
* * 3DSTATE_VIEWPORT_STATE_POINTERS
* * 3DSTATE_CC_POINTERS
* * MEDIA_STATE_POINTERS"
*
* 3DSTATE_SCISSOR_STATE_POINTERS is not on the list, but it is
* reasonable to also reissue the command. Same to PCB.
*/
session->viewport_state_changed = true;
 
session->cc_state_blend_changed = true;
session->cc_state_dsa_changed = true;
session->cc_state_cc_changed = true;
 
session->scissor_state_changed = true;
 
session->binding_table_vs_changed = true;
session->binding_table_gs_changed = true;
session->binding_table_fs_changed = true;
 
session->sampler_state_vs_changed = true;
session->sampler_state_gs_changed = true;
session->sampler_state_fs_changed = true;
 
session->pcb_state_vs_changed = true;
session->pcb_state_gs_changed = true;
session->pcb_state_fs_changed = true;
}
}
 
static void
gen6_pipeline_common_urb(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_URB */
if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
const bool gs_active = (ilo->gs || (ilo->vs &&
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_GEN6_SO)));
int vs_entry_size, gs_entry_size;
int vs_total_size, gs_total_size;
 
vs_entry_size = (ilo->vs) ?
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
 
/*
* As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
* share VUE handles. The VUE allocation size must be large enough to
* store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
*
* I am not sure if the PRM explicitly states that VF and VS share VUE
* handles. But here is a citation that implies so:
*
* From the Sandy Bridge PRM, volume 2 part 1, page 44:
*
* "Once a FF stage that spawn threads has sufficient input to
* initiate a thread, it must guarantee that it is safe to request
* the thread initiation. For all these FF stages, this check is
* based on :
*
* - The availability of output URB entries:
* - VS: As the input URB entries are overwritten with the
* VS-generated output data, output URB availability isn't a
* factor."
*/
if (vs_entry_size < ilo->ve->count)
vs_entry_size = ilo->ve->count;
 
gs_entry_size = (ilo->gs) ?
ilo_shader_get_kernel_param(ilo->gs, ILO_KERNEL_OUTPUT_COUNT) :
(gs_active) ? vs_entry_size : 0;
 
/* in bytes */
vs_entry_size *= sizeof(float) * 4;
gs_entry_size *= sizeof(float) * 4;
vs_total_size = ilo->dev->urb_size;
 
if (gs_active) {
vs_total_size /= 2;
gs_total_size = vs_total_size;
}
else {
gs_total_size = 0;
}
 
p->gen6_3DSTATE_URB(p->dev, vs_total_size, gs_total_size,
vs_entry_size, gs_entry_size, p->cp);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 27:
*
* "Because of a urb corruption caused by allocating a previous
* gsunit's urb entry to vsunit software is required to send a
* "GS NULL Fence" (Send URB fence with VS URB size == 1 and GS URB
* size == 0) plus a dummy DRAW call before any case where VS will
* be taking over GS URB space."
*/
if (p->state.gs.active && !gs_active)
ilo_3d_pipeline_emit_flush_gen6(p);
 
p->state.gs.active = gs_active;
}
}
 
static void
gen6_pipeline_common_pointers_1(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_VIEWPORT_STATE_POINTERS */
if (session->viewport_state_changed) {
p->gen6_3DSTATE_VIEWPORT_STATE_POINTERS(p->dev,
p->state.CLIP_VIEWPORT,
p->state.SF_VIEWPORT,
p->state.CC_VIEWPORT, p->cp);
}
}
 
static void
gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CC_STATE_POINTERS */
if (session->cc_state_blend_changed ||
session->cc_state_dsa_changed ||
session->cc_state_cc_changed) {
p->gen6_3DSTATE_CC_STATE_POINTERS(p->dev,
p->state.BLEND_STATE,
p->state.DEPTH_STENCIL_STATE,
p->state.COLOR_CALC_STATE, p->cp);
}
 
/* 3DSTATE_SAMPLER_STATE_POINTERS */
if (session->sampler_state_vs_changed ||
session->sampler_state_gs_changed ||
session->sampler_state_fs_changed) {
p->gen6_3DSTATE_SAMPLER_STATE_POINTERS(p->dev,
p->state.vs.SAMPLER_STATE,
0,
p->state.wm.SAMPLER_STATE, p->cp);
}
}
 
static void
gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_SCISSOR_STATE_POINTERS */
if (session->scissor_state_changed) {
p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
p->state.SCISSOR_RECT, p->cp);
}
 
/* 3DSTATE_BINDING_TABLE_POINTERS */
if (session->binding_table_vs_changed ||
session->binding_table_gs_changed ||
session->binding_table_fs_changed) {
p->gen6_3DSTATE_BINDING_TABLE_POINTERS(p->dev,
p->state.vs.BINDING_TABLE_STATE,
p->state.gs.BINDING_TABLE_STATE,
p->state.wm.BINDING_TABLE_STATE, p->cp);
}
}
 
void
gen6_pipeline_vf(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_INDEX_BUFFER */
if (DIRTY(IB) || session->primitive_restart_changed ||
session->batch_bo_changed) {
p->gen6_3DSTATE_INDEX_BUFFER(p->dev,
&ilo->ib, ilo->draw->primitive_restart, p->cp);
}
 
/* 3DSTATE_VERTEX_BUFFERS */
if (DIRTY(VB) || DIRTY(VE) || session->batch_bo_changed) {
p->gen6_3DSTATE_VERTEX_BUFFERS(p->dev,
ilo->vb.states, ilo->vb.enabled_mask, ilo->ve, p->cp);
}
 
/* 3DSTATE_VERTEX_ELEMENTS */
if (DIRTY(VE) || DIRTY(VS)) {
const struct ilo_ve_state *ve = ilo->ve;
bool last_velement_edgeflag = false;
bool prepend_generate_ids = false;
 
if (ilo->vs) {
if (ilo_shader_get_kernel_param(ilo->vs,
ILO_KERNEL_VS_INPUT_EDGEFLAG)) {
/* we rely on the state tracker here */
assert(ilo_shader_get_kernel_param(ilo->vs,
ILO_KERNEL_INPUT_COUNT) == ve->count);
 
last_velement_edgeflag = true;
}
 
if (ilo_shader_get_kernel_param(ilo->vs,
ILO_KERNEL_VS_INPUT_INSTANCEID) ||
ilo_shader_get_kernel_param(ilo->vs,
ILO_KERNEL_VS_INPUT_VERTEXID))
prepend_generate_ids = true;
}
 
p->gen6_3DSTATE_VERTEX_ELEMENTS(p->dev, ve,
last_velement_edgeflag, prepend_generate_ids, p->cp);
}
}
 
void
gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_VF_STATISTICS */
if (session->hw_ctx_changed)
p->gen6_3DSTATE_VF_STATISTICS(p->dev, false, p->cp);
}
 
void
gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DPRIMITIVE */
p->gen6_3DPRIMITIVE(p->dev, ilo->draw, &ilo->ib, false, p->cp);
p->state.has_gen6_wa_pipe_control = false;
}
 
void
gen6_pipeline_vs(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) ||
session->kernel_bo_changed);
const bool emit_3dstate_constant_vs = session->pcb_state_vs_changed;
 
/*
* the classic i965 does this in upload_vs_state(), citing a spec that I
* cannot find
*/
if (emit_3dstate_vs && p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
/* 3DSTATE_CONSTANT_VS */
if (emit_3dstate_constant_vs) {
p->gen6_3DSTATE_CONSTANT_VS(p->dev,
&p->state.vs.PUSH_CONSTANT_BUFFER,
&p->state.vs.PUSH_CONSTANT_BUFFER_size,
1, p->cp);
}
 
/* 3DSTATE_VS */
if (emit_3dstate_vs) {
const int num_samplers = ilo->sampler[PIPE_SHADER_VERTEX].count;
 
p->gen6_3DSTATE_VS(p->dev, ilo->vs, num_samplers, p->cp);
}
 
if (emit_3dstate_constant_vs && p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_vs_const_flush(p);
}
 
static void
gen6_pipeline_gs(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CONSTANT_GS */
if (session->pcb_state_gs_changed)
p->gen6_3DSTATE_CONSTANT_GS(p->dev, NULL, NULL, 0, p->cp);
 
/* 3DSTATE_GS */
if (DIRTY(GS) || DIRTY(VS) ||
session->prim_changed || session->kernel_bo_changed) {
const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
 
p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp);
}
}
 
bool
gen6_pipeline_update_max_svbi(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
if (DIRTY(VS) || DIRTY(GS) || DIRTY(SO)) {
const struct pipe_stream_output_info *so_info =
(ilo->gs) ? ilo_shader_get_kernel_so_info(ilo->gs) :
(ilo->vs) ? ilo_shader_get_kernel_so_info(ilo->vs) : NULL;
unsigned max_svbi = 0xffffffff;
int i;
 
for (i = 0; i < so_info->num_outputs; i++) {
const int output_buffer = so_info->output[i].output_buffer;
const struct pipe_stream_output_target *so =
ilo->so.states[output_buffer];
const int struct_size = so_info->stride[output_buffer] * 4;
const int elem_size = so_info->output[i].num_components * 4;
int buf_size, count;
 
if (!so) {
max_svbi = 0;
break;
}
 
buf_size = so->buffer_size - so_info->output[i].dst_offset * 4;
 
count = buf_size / struct_size;
if (buf_size % struct_size >= elem_size)
count++;
 
if (count < max_svbi)
max_svbi = count;
}
 
if (p->state.so_max_vertices != max_svbi) {
p->state.so_max_vertices = max_svbi;
return true;
}
}
 
return false;
}
 
static void
gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
const bool emit = gen6_pipeline_update_max_svbi(p, ilo, session);
 
/* 3DSTATE_GS_SVB_INDEX */
if (emit) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_3DSTATE_GS_SVB_INDEX(p->dev,
0, p->state.so_num_vertices, p->state.so_max_vertices,
false, p->cp);
 
if (session->hw_ctx_changed) {
int i;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 148:
*
* "If a buffer is not enabled then the SVBI must be set to 0x0
* in order to not cause overflow in that SVBI."
*
* "If a buffer is not enabled then the MaxSVBI must be set to
* 0xFFFFFFFF in order to not cause overflow in that SVBI."
*/
for (i = 1; i < 4; i++) {
p->gen6_3DSTATE_GS_SVB_INDEX(p->dev,
i, 0, 0xffffffff, false, p->cp);
}
}
}
}
 
void
gen6_pipeline_clip(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CLIP */
if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) {
bool enable_guardband = true;
unsigned i;
 
/*
* We do not do 2D clipping yet. Guard band test should only be enabled
* when the viewport is larger than the framebuffer.
*/
for (i = 0; i < ilo->viewport.count; i++) {
const struct ilo_viewport_cso *vp = &ilo->viewport.cso[i];
 
if (vp->min_x > 0.0f || vp->max_x < ilo->fb.state.width ||
vp->min_y > 0.0f || vp->max_y < ilo->fb.state.height) {
enable_guardband = false;
break;
}
}
 
p->gen6_3DSTATE_CLIP(p->dev, ilo->rasterizer,
ilo->fs, enable_guardband, 1, p->cp);
}
}
 
static void
gen6_pipeline_sf(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_SF */
if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) {
p->gen6_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fs,
(ilo->gs) ? ilo->gs : ilo->vs, p->cp);
}
}
 
void
gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_DRAWING_RECTANGLE */
if (DIRTY(FB)) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_3DSTATE_DRAWING_RECTANGLE(p->dev, 0, 0,
ilo->fb.state.width, ilo->fb.state.height, p->cp);
}
}
 
static void
gen6_pipeline_wm(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CONSTANT_PS */
if (session->pcb_state_fs_changed)
p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp);
 
/* 3DSTATE_WM */
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) || DIRTY(DSA) ||
DIRTY(RASTERIZER) || session->kernel_bo_changed) {
const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count;
const bool dual_blend = ilo->blend->dual_blend;
const bool cc_may_kill = (ilo->dsa->alpha.enabled ||
ilo->blend->alpha_to_coverage);
 
if (p->dev->gen == ILO_GEN(6) && session->hw_ctx_changed)
gen6_wa_pipe_control_wm_max_threads_stall(p);
 
p->gen6_3DSTATE_WM(p->dev, ilo->fs, num_samplers,
ilo->rasterizer, dual_blend, cc_may_kill, p->cp);
}
}
 
static void
gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
const uint32_t *packed_sample_pos;
 
packed_sample_pos = (ilo->fb.num_samples > 1) ?
&p->packed_sample_position_4x : &p->packed_sample_position_1x;
 
if (p->dev->gen == ILO_GEN(6)) {
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pipe_control_wm_multisample_flush(p);
}
 
p->gen6_3DSTATE_MULTISAMPLE(p->dev,
ilo->fb.num_samples, packed_sample_pos,
ilo->rasterizer->state.half_pixel_center, p->cp);
 
p->gen6_3DSTATE_SAMPLE_MASK(p->dev,
(ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1, p->cp);
}
}
 
static void
gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
if (DIRTY(FB) || session->batch_bo_changed) {
const struct ilo_zs_surface *zs;
 
if (ilo->fb.state.zsbuf) {
const struct ilo_surface_cso *surface =
(const struct ilo_surface_cso *) ilo->fb.state.zsbuf;
 
assert(!surface->is_rt);
zs = &surface->u.zs;
}
else {
zs = &ilo->fb.null_zs;
}
 
if (p->dev->gen == ILO_GEN(6)) {
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pipe_control_wm_depth_flush(p);
}
 
p->gen6_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
 
/* TODO */
p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp);
}
}
 
void
gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_POLY_STIPPLE_PATTERN and 3DSTATE_POLY_STIPPLE_OFFSET */
if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) &&
ilo->rasterizer->state.poly_stipple_enable) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_3DSTATE_POLY_STIPPLE_PATTERN(p->dev,
&ilo->poly_stipple, p->cp);
 
p->gen6_3DSTATE_POLY_STIPPLE_OFFSET(p->dev, 0, 0, p->cp);
}
 
/* 3DSTATE_LINE_STIPPLE */
if (DIRTY(RASTERIZER) && ilo->rasterizer->state.line_stipple_enable) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_3DSTATE_LINE_STIPPLE(p->dev,
ilo->rasterizer->state.line_stipple_pattern,
ilo->rasterizer->state.line_stipple_factor + 1, p->cp);
}
 
/* 3DSTATE_AA_LINE_PARAMETERS */
if (DIRTY(RASTERIZER) && ilo->rasterizer->state.line_smooth) {
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_3DSTATE_AA_LINE_PARAMETERS(p->dev, p->cp);
}
}
 
static void
gen6_pipeline_state_viewports(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* SF_CLIP_VIEWPORT and CC_VIEWPORT */
if (p->dev->gen >= ILO_GEN(7) && DIRTY(VIEWPORT)) {
p->state.SF_CLIP_VIEWPORT = p->gen7_SF_CLIP_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
 
p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
 
session->viewport_state_changed = true;
}
/* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */
else if (DIRTY(VIEWPORT)) {
p->state.CLIP_VIEWPORT = p->gen6_CLIP_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
 
p->state.SF_VIEWPORT = p->gen6_SF_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
 
p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev,
ilo->viewport.cso, ilo->viewport.count, p->cp);
 
session->viewport_state_changed = true;
}
}
 
static void
gen6_pipeline_state_cc(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* BLEND_STATE */
if (DIRTY(BLEND) || DIRTY(FB) || DIRTY(DSA)) {
p->state.BLEND_STATE = p->gen6_BLEND_STATE(p->dev,
ilo->blend, &ilo->fb, &ilo->dsa->alpha, p->cp);
 
session->cc_state_blend_changed = true;
}
 
/* COLOR_CALC_STATE */
if (DIRTY(DSA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) {
p->state.COLOR_CALC_STATE =
p->gen6_COLOR_CALC_STATE(p->dev, &ilo->stencil_ref,
ilo->dsa->alpha.ref_value, &ilo->blend_color, p->cp);
 
session->cc_state_cc_changed = true;
}
 
/* DEPTH_STENCIL_STATE */
if (DIRTY(DSA)) {
p->state.DEPTH_STENCIL_STATE =
p->gen6_DEPTH_STENCIL_STATE(p->dev, ilo->dsa, p->cp);
 
session->cc_state_dsa_changed = true;
}
}
 
static void
gen6_pipeline_state_scissors(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* SCISSOR_RECT */
if (DIRTY(SCISSOR) || DIRTY(VIEWPORT)) {
/* there should be as many scissors as there are viewports */
p->state.SCISSOR_RECT = p->gen6_SCISSOR_RECT(p->dev,
&ilo->scissor, ilo->viewport.count, p->cp);
 
session->scissor_state_changed = true;
}
}
 
static void
gen6_pipeline_state_surfaces_rt(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* SURFACE_STATEs for render targets */
if (DIRTY(FB)) {
const struct ilo_fb_state *fb = &ilo->fb;
const int offset = ILO_WM_DRAW_SURFACE(0);
uint32_t *surface_state = &p->state.wm.SURFACE_STATE[offset];
int i;
 
for (i = 0; i < fb->state.nr_cbufs; i++) {
const struct ilo_surface_cso *surface =
(const struct ilo_surface_cso *) fb->state.cbufs[i];
 
assert(surface && surface->is_rt);
surface_state[i] =
p->gen6_SURFACE_STATE(p->dev, &surface->u.rt, true, p->cp);
}
 
/*
* Upload at least one render target, as
* brw_update_renderbuffer_surfaces() does. I don't know why.
*/
if (i == 0) {
struct ilo_view_surface null_surface;
 
ilo_gpe_init_view_surface_null(p->dev,
fb->state.width, fb->state.height,
1, 0, &null_surface);
 
surface_state[i] =
p->gen6_SURFACE_STATE(p->dev, &null_surface, true, p->cp);
 
i++;
}
 
memset(&surface_state[i], 0, (ILO_MAX_DRAW_BUFFERS - i) * 4);
 
if (i && session->num_surfaces[PIPE_SHADER_FRAGMENT] < offset + i)
session->num_surfaces[PIPE_SHADER_FRAGMENT] = offset + i;
 
session->binding_table_fs_changed = true;
}
}
 
static void
gen6_pipeline_state_surfaces_so(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
const struct ilo_so_state *so = &ilo->so;
 
if (p->dev->gen != ILO_GEN(6))
return;
 
/* SURFACE_STATEs for stream output targets */
if (DIRTY(VS) || DIRTY(GS) || DIRTY(SO)) {
const struct pipe_stream_output_info *so_info =
(ilo->gs) ? ilo_shader_get_kernel_so_info(ilo->gs) :
(ilo->vs) ? ilo_shader_get_kernel_so_info(ilo->vs) : NULL;
const int offset = ILO_GS_SO_SURFACE(0);
uint32_t *surface_state = &p->state.gs.SURFACE_STATE[offset];
int i;
 
for (i = 0; so_info && i < so_info->num_outputs; i++) {
const int target = so_info->output[i].output_buffer;
const struct pipe_stream_output_target *so_target =
(target < so->count) ? so->states[target] : NULL;
 
if (so_target) {
surface_state[i] = p->gen6_so_SURFACE_STATE(p->dev,
so_target, so_info, i, p->cp);
}
else {
surface_state[i] = 0;
}
}
 
memset(&surface_state[i], 0, (ILO_MAX_SO_BINDINGS - i) * 4);
 
if (i && session->num_surfaces[PIPE_SHADER_GEOMETRY] < offset + i)
session->num_surfaces[PIPE_SHADER_GEOMETRY] = offset + i;
 
session->binding_table_gs_changed = true;
}
}
 
static void
gen6_pipeline_state_surfaces_view(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
int shader_type,
struct gen6_pipeline_session *session)
{
const struct ilo_view_state *view = &ilo->view[shader_type];
uint32_t *surface_state;
int offset, i;
bool skip = false;
 
/* SURFACE_STATEs for sampler views */
switch (shader_type) {
case PIPE_SHADER_VERTEX:
if (DIRTY(VIEW_VS)) {
offset = ILO_VS_TEXTURE_SURFACE(0);
surface_state = &p->state.vs.SURFACE_STATE[offset];
 
session->binding_table_vs_changed = true;
}
else {
skip = true;
}
break;
case PIPE_SHADER_FRAGMENT:
if (DIRTY(VIEW_FS)) {
offset = ILO_WM_TEXTURE_SURFACE(0);
surface_state = &p->state.wm.SURFACE_STATE[offset];
 
session->binding_table_fs_changed = true;
}
else {
skip = true;
}
break;
default:
skip = true;
break;
}
 
if (skip)
return;
 
for (i = 0; i < view->count; i++) {
if (view->states[i]) {
const struct ilo_view_cso *cso =
(const struct ilo_view_cso *) view->states[i];
 
surface_state[i] =
p->gen6_SURFACE_STATE(p->dev, &cso->surface, false, p->cp);
}
else {
surface_state[i] = 0;
}
}
 
memset(&surface_state[i], 0, (ILO_MAX_SAMPLER_VIEWS - i) * 4);
 
if (i && session->num_surfaces[shader_type] < offset + i)
session->num_surfaces[shader_type] = offset + i;
}
 
static void
gen6_pipeline_state_surfaces_const(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
int shader_type,
struct gen6_pipeline_session *session)
{
const struct ilo_cbuf_state *cbuf = &ilo->cbuf[shader_type];
uint32_t *surface_state;
int offset, count, i;
bool skip = false;
 
/* SURFACE_STATEs for constant buffers */
switch (shader_type) {
case PIPE_SHADER_VERTEX:
if (DIRTY(CBUF)) {
offset = ILO_VS_CONST_SURFACE(0);
surface_state = &p->state.vs.SURFACE_STATE[offset];
 
session->binding_table_vs_changed = true;
}
else {
skip = true;
}
break;
case PIPE_SHADER_FRAGMENT:
if (DIRTY(CBUF)) {
offset = ILO_WM_CONST_SURFACE(0);
surface_state = &p->state.wm.SURFACE_STATE[offset];
 
session->binding_table_fs_changed = true;
}
else {
skip = true;
}
break;
default:
skip = true;
break;
}
 
if (skip)
return;
 
count = util_last_bit(cbuf->enabled_mask);
for (i = 0; i < count; i++) {
if (cbuf->cso[i].resource) {
surface_state[i] = p->gen6_SURFACE_STATE(p->dev,
&cbuf->cso[i].surface, false, p->cp);
}
else {
surface_state[i] = 0;
}
}
 
memset(&surface_state[count], 0, (ILO_MAX_CONST_BUFFERS - count) * 4);
 
if (count && session->num_surfaces[shader_type] < offset + count)
session->num_surfaces[shader_type] = offset + count;
}
 
static void
gen6_pipeline_state_binding_tables(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
int shader_type,
struct gen6_pipeline_session *session)
{
uint32_t *binding_table_state, *surface_state;
int *binding_table_state_size, size;
bool skip = false;
 
/* BINDING_TABLE_STATE */
switch (shader_type) {
case PIPE_SHADER_VERTEX:
surface_state = p->state.vs.SURFACE_STATE;
binding_table_state = &p->state.vs.BINDING_TABLE_STATE;
binding_table_state_size = &p->state.vs.BINDING_TABLE_STATE_size;
 
skip = !session->binding_table_vs_changed;
break;
case PIPE_SHADER_GEOMETRY:
surface_state = p->state.gs.SURFACE_STATE;
binding_table_state = &p->state.gs.BINDING_TABLE_STATE;
binding_table_state_size = &p->state.gs.BINDING_TABLE_STATE_size;
 
skip = !session->binding_table_gs_changed;
break;
case PIPE_SHADER_FRAGMENT:
surface_state = p->state.wm.SURFACE_STATE;
binding_table_state = &p->state.wm.BINDING_TABLE_STATE;
binding_table_state_size = &p->state.wm.BINDING_TABLE_STATE_size;
 
skip = !session->binding_table_fs_changed;
break;
default:
skip = true;
break;
}
 
if (skip)
return;
 
/*
* If we have seemingly less SURFACE_STATEs than before, it could be that
* we did not touch those reside at the tail in this upload. Loop over
* them to figure out the real number of SURFACE_STATEs.
*/
for (size = *binding_table_state_size;
size > session->num_surfaces[shader_type]; size--) {
if (surface_state[size - 1])
break;
}
if (size < session->num_surfaces[shader_type])
size = session->num_surfaces[shader_type];
 
*binding_table_state = p->gen6_BINDING_TABLE_STATE(p->dev,
surface_state, size, p->cp);
*binding_table_state_size = size;
}
 
static void
gen6_pipeline_state_samplers(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
int shader_type,
struct gen6_pipeline_session *session)
{
const struct ilo_sampler_cso * const *samplers =
ilo->sampler[shader_type].cso;
const struct pipe_sampler_view * const *views =
(const struct pipe_sampler_view **) ilo->view[shader_type].states;
const int num_samplers = ilo->sampler[shader_type].count;
const int num_views = ilo->view[shader_type].count;
uint32_t *sampler_state, *border_color_state;
bool emit_border_color = false;
bool skip = false;
 
/* SAMPLER_BORDER_COLOR_STATE and SAMPLER_STATE */
switch (shader_type) {
case PIPE_SHADER_VERTEX:
if (DIRTY(SAMPLER_VS) || DIRTY(VIEW_VS)) {
sampler_state = &p->state.vs.SAMPLER_STATE;
border_color_state = p->state.vs.SAMPLER_BORDER_COLOR_STATE;
 
if (DIRTY(SAMPLER_VS))
emit_border_color = true;
 
session->sampler_state_vs_changed = true;
}
else {
skip = true;
}
break;
case PIPE_SHADER_FRAGMENT:
if (DIRTY(SAMPLER_FS) || DIRTY(VIEW_FS)) {
sampler_state = &p->state.wm.SAMPLER_STATE;
border_color_state = p->state.wm.SAMPLER_BORDER_COLOR_STATE;
 
if (DIRTY(SAMPLER_FS))
emit_border_color = true;
 
session->sampler_state_fs_changed = true;
}
else {
skip = true;
}
break;
default:
skip = true;
break;
}
 
if (skip)
return;
 
if (emit_border_color) {
int i;
 
for (i = 0; i < num_samplers; i++) {
border_color_state[i] = (samplers[i]) ?
p->gen6_SAMPLER_BORDER_COLOR_STATE(p->dev,
samplers[i], p->cp) : 0;
}
}
 
/* should we take the minimum of num_samplers and num_views? */
*sampler_state = p->gen6_SAMPLER_STATE(p->dev,
samplers, views,
border_color_state,
MIN2(num_samplers, num_views), p->cp);
}
 
static void
gen6_pipeline_state_pcb(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* push constant buffer for VS */
if (DIRTY(VS) || DIRTY(CLIP)) {
const int clip_state_size = (ilo->vs) ?
ilo_shader_get_kernel_param(ilo->vs,
ILO_KERNEL_VS_PCB_UCP_SIZE) : 0;
 
if (clip_state_size) {
void *pcb;
 
p->state.vs.PUSH_CONSTANT_BUFFER_size = clip_state_size;
p->state.vs.PUSH_CONSTANT_BUFFER =
p->gen6_push_constant_buffer(p->dev,
p->state.vs.PUSH_CONSTANT_BUFFER_size, &pcb, p->cp);
 
memcpy(pcb, &ilo->clip, clip_state_size);
}
else {
p->state.vs.PUSH_CONSTANT_BUFFER_size = 0;
p->state.vs.PUSH_CONSTANT_BUFFER = 0;
}
 
session->pcb_state_vs_changed = true;
}
}
 
#undef DIRTY
 
static void
gen6_pipeline_commands(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/*
* We try to keep the order of the commands match, as closely as possible,
* that of the classic i965 driver. It allows us to compare the command
* streams easily.
*/
gen6_pipeline_common_select(p, ilo, session);
gen6_pipeline_gs_svbi(p, ilo, session);
gen6_pipeline_common_sip(p, ilo, session);
gen6_pipeline_vf_statistics(p, ilo, session);
gen6_pipeline_common_base_address(p, ilo, session);
gen6_pipeline_common_pointers_1(p, ilo, session);
gen6_pipeline_common_urb(p, ilo, session);
gen6_pipeline_common_pointers_2(p, ilo, session);
gen6_pipeline_wm_multisample(p, ilo, session);
gen6_pipeline_vs(p, ilo, session);
gen6_pipeline_gs(p, ilo, session);
gen6_pipeline_clip(p, ilo, session);
gen6_pipeline_sf(p, ilo, session);
gen6_pipeline_wm(p, ilo, session);
gen6_pipeline_common_pointers_3(p, ilo, session);
gen6_pipeline_wm_depth(p, ilo, session);
gen6_pipeline_wm_raster(p, ilo, session);
gen6_pipeline_sf_rect(p, ilo, session);
gen6_pipeline_vf(p, ilo, session);
gen6_pipeline_vf_draw(p, ilo, session);
}
 
void
gen6_pipeline_states(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
int shader_type;
 
gen6_pipeline_state_viewports(p, ilo, session);
gen6_pipeline_state_cc(p, ilo, session);
gen6_pipeline_state_scissors(p, ilo, session);
gen6_pipeline_state_pcb(p, ilo, session);
 
/*
* upload all SURAFCE_STATEs together so that we know there are minimal
* paddings
*/
gen6_pipeline_state_surfaces_rt(p, ilo, session);
gen6_pipeline_state_surfaces_so(p, ilo, session);
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
gen6_pipeline_state_surfaces_view(p, ilo, shader_type, session);
gen6_pipeline_state_surfaces_const(p, ilo, shader_type, session);
}
 
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
gen6_pipeline_state_samplers(p, ilo, shader_type, session);
/* this must be called after all SURFACE_STATEs are uploaded */
gen6_pipeline_state_binding_tables(p, ilo, shader_type, session);
}
}
 
void
gen6_pipeline_prepare(const struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
memset(session, 0, sizeof(*session));
session->pipe_dirty = ilo->dirty;
session->reduced_prim = u_reduced_prim(ilo->draw->mode);
 
/* available space before the session */
session->init_cp_space = ilo_cp_space(p->cp);
 
session->hw_ctx_changed =
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_HW);
 
if (session->hw_ctx_changed) {
/* these should be enough to make everything uploaded */
session->batch_bo_changed = true;
session->state_bo_changed = true;
session->kernel_bo_changed = true;
session->prim_changed = true;
session->primitive_restart_changed = true;
}
else {
/*
* Any state that involves resources needs to be re-emitted when the
* batch bo changed. This is because we do not pin the resources and
* their offsets (or existence) may change between batch buffers.
*
* Since we messed around with ILO_3D_PIPELINE_INVALIDATE_BATCH_BO in
* handle_invalid_batch_bo(), use ILO_3D_PIPELINE_INVALIDATE_STATE_BO as
* a temporary workaround.
*/
session->batch_bo_changed =
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
 
session->state_bo_changed =
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_STATE_BO);
session->kernel_bo_changed =
(p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO);
session->prim_changed = (p->state.reduced_prim != session->reduced_prim);
session->primitive_restart_changed =
(p->state.primitive_restart != ilo->draw->primitive_restart);
}
}
 
void
gen6_pipeline_draw(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* force all states to be uploaded if the state bo changed */
if (session->state_bo_changed)
session->pipe_dirty = ILO_DIRTY_ALL;
else
session->pipe_dirty = ilo->dirty;
 
session->emit_draw_states(p, ilo, session);
 
/* force all commands to be uploaded if the HW context changed */
if (session->hw_ctx_changed)
session->pipe_dirty = ILO_DIRTY_ALL;
else
session->pipe_dirty = ilo->dirty;
 
session->emit_draw_commands(p, ilo, session);
}
 
void
gen6_pipeline_end(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* sanity check size estimation */
assert(session->init_cp_space - ilo_cp_space(p->cp) <=
ilo_3d_pipeline_estimate_size(p, ILO_3D_PIPELINE_DRAW, ilo));
 
p->state.reduced_prim = session->reduced_prim;
p->state.primitive_restart = ilo->draw->primitive_restart;
}
 
static void
ilo_3d_pipeline_emit_draw_gen6(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo)
{
struct gen6_pipeline_session session;
 
gen6_pipeline_prepare(p, ilo, &session);
 
session.emit_draw_states = gen6_pipeline_states;
session.emit_draw_commands = gen6_pipeline_commands;
 
gen6_pipeline_draw(p, ilo, &session);
gen6_pipeline_end(p, ilo, &session);
}
 
void
ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p)
{
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_INSTRUCTION_FLUSH |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_TC_FLUSH |
PIPE_CONTROL_NO_WRITE |
PIPE_CONTROL_CS_STALL,
0, 0, false, p->cp);
}
 
void
ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index)
{
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, true);
 
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_WRITE_TIMESTAMP,
bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE,
true, p->cp);
}
 
void
ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index)
{
if (p->dev->gen == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
 
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_WRITE_DEPTH_COUNT,
bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE,
true, p->cp);
}
 
static int
gen6_pipeline_estimate_commands(const struct ilo_3d_pipeline *p,
const struct ilo_gpe_gen6 *gen6,
const struct ilo_context *ilo)
{
static int size;
enum ilo_gpe_gen6_command cmd;
 
if (size)
return size;
 
for (cmd = 0; cmd < ILO_GPE_GEN6_COMMAND_COUNT; cmd++) {
int count;
 
switch (cmd) {
case ILO_GPE_GEN6_PIPE_CONTROL:
/* for the workaround */
count = 2;
/* another one after 3DSTATE_URB */
count += 1;
/* and another one after 3DSTATE_CONSTANT_VS */
count += 1;
break;
case ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX:
/* there are 4 SVBIs */
count = 4;
break;
case ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS:
count = 33;
break;
case ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS:
count = 34;
break;
case ILO_GPE_GEN6_MEDIA_VFE_STATE:
case ILO_GPE_GEN6_MEDIA_CURBE_LOAD:
case ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD:
case ILO_GPE_GEN6_MEDIA_GATEWAY_STATE:
case ILO_GPE_GEN6_MEDIA_STATE_FLUSH:
case ILO_GPE_GEN6_MEDIA_OBJECT_WALKER:
/* media commands */
count = 0;
break;
default:
count = 1;
break;
}
 
if (count)
size += gen6->estimate_command_size(p->dev, cmd, count);
}
 
return size;
}
 
static int
gen6_pipeline_estimate_states(const struct ilo_3d_pipeline *p,
const struct ilo_gpe_gen6 *gen6,
const struct ilo_context *ilo)
{
static int static_size;
int shader_type, count, size;
 
if (!static_size) {
struct {
enum ilo_gpe_gen6_state state;
int count;
} static_states[] = {
/* viewports */
{ ILO_GPE_GEN6_SF_VIEWPORT, 1 },
{ ILO_GPE_GEN6_CLIP_VIEWPORT, 1 },
{ ILO_GPE_GEN6_CC_VIEWPORT, 1 },
/* cc */
{ ILO_GPE_GEN6_COLOR_CALC_STATE, 1 },
{ ILO_GPE_GEN6_BLEND_STATE, ILO_MAX_DRAW_BUFFERS },
{ ILO_GPE_GEN6_DEPTH_STENCIL_STATE, 1 },
/* scissors */
{ ILO_GPE_GEN6_SCISSOR_RECT, 1 },
/* binding table (vs, gs, fs) */
{ ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES },
{ ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES },
{ ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES },
};
int i;
 
for (i = 0; i < Elements(static_states); i++) {
static_size += gen6->estimate_state_size(p->dev,
static_states[i].state,
static_states[i].count);
}
}
 
size = static_size;
 
/*
* render targets (fs)
* stream outputs (gs)
* sampler views (vs, fs)
* constant buffers (vs, fs)
*/
count = ilo->fb.state.nr_cbufs;
 
if (ilo->gs) {
const struct pipe_stream_output_info *so_info =
ilo_shader_get_kernel_so_info(ilo->gs);
 
count += so_info->num_outputs;
}
else if (ilo->vs) {
const struct pipe_stream_output_info *so_info =
ilo_shader_get_kernel_so_info(ilo->vs);
 
count += so_info->num_outputs;
}
 
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
count += ilo->view[shader_type].count;
count += util_bitcount(ilo->cbuf[shader_type].enabled_mask);
}
 
if (count) {
size += gen6->estimate_state_size(p->dev,
ILO_GPE_GEN6_SURFACE_STATE, count);
}
 
/* samplers (vs, fs) */
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
count = ilo->sampler[shader_type].count;
if (count) {
size += gen6->estimate_state_size(p->dev,
ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE, count);
size += gen6->estimate_state_size(p->dev,
ILO_GPE_GEN6_SAMPLER_STATE, count);
}
}
 
/* pcb (vs) */
if (ilo->vs &&
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) {
const int pcb_size =
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE);
 
size += gen6->estimate_state_size(p->dev,
ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER, pcb_size);
}
 
return size;
}
 
static int
ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p,
enum ilo_3d_pipeline_action action,
const void *arg)
{
const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
int size;
 
switch (action) {
case ILO_3D_PIPELINE_DRAW:
{
const struct ilo_context *ilo = arg;
 
size = gen6_pipeline_estimate_commands(p, gen6, ilo) +
gen6_pipeline_estimate_states(p, gen6, ilo);
}
break;
case ILO_3D_PIPELINE_FLUSH:
size = gen6->estimate_command_size(p->dev,
ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3;
break;
case ILO_3D_PIPELINE_WRITE_TIMESTAMP:
size = gen6->estimate_command_size(p->dev,
ILO_GPE_GEN6_PIPE_CONTROL, 1) * 2;
break;
case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT:
size = gen6->estimate_command_size(p->dev,
ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3;
break;
default:
assert(!"unknown 3D pipeline action");
size = 0;
break;
}
 
return size;
}
 
void
ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p)
{
const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
 
p->estimate_size = ilo_3d_pipeline_estimate_size_gen6;
p->emit_draw = ilo_3d_pipeline_emit_draw_gen6;
p->emit_flush = ilo_3d_pipeline_emit_flush_gen6;
p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6;
p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6;
 
#define GEN6_USE(p, name, from) \
p->gen6_ ## name = from->emit_ ## name
GEN6_USE(p, STATE_BASE_ADDRESS, gen6);
GEN6_USE(p, STATE_SIP, gen6);
GEN6_USE(p, PIPELINE_SELECT, gen6);
GEN6_USE(p, 3DSTATE_BINDING_TABLE_POINTERS, gen6);
GEN6_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS, gen6);
GEN6_USE(p, 3DSTATE_URB, gen6);
GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen6);
GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen6);
GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen6);
GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen6);
GEN6_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS, gen6);
GEN6_USE(p, 3DSTATE_CC_STATE_POINTERS, gen6);
GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
GEN6_USE(p, 3DSTATE_VS, gen6);
GEN6_USE(p, 3DSTATE_GS, gen6);
GEN6_USE(p, 3DSTATE_CLIP, gen6);
GEN6_USE(p, 3DSTATE_SF, gen6);
GEN6_USE(p, 3DSTATE_WM, gen6);
GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen6);
GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen6);
GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen6);
GEN6_USE(p, 3DSTATE_SAMPLE_MASK, gen6);
GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen6);
GEN6_USE(p, 3DSTATE_DEPTH_BUFFER, gen6);
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen6);
GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen6);
GEN6_USE(p, 3DSTATE_GS_SVB_INDEX, gen6);
GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen6);
GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen6);
GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen6);
GEN6_USE(p, PIPE_CONTROL, gen6);
GEN6_USE(p, 3DPRIMITIVE, gen6);
GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen6);
GEN6_USE(p, SF_VIEWPORT, gen6);
GEN6_USE(p, CLIP_VIEWPORT, gen6);
GEN6_USE(p, CC_VIEWPORT, gen6);
GEN6_USE(p, COLOR_CALC_STATE, gen6);
GEN6_USE(p, BLEND_STATE, gen6);
GEN6_USE(p, DEPTH_STENCIL_STATE, gen6);
GEN6_USE(p, SCISSOR_RECT, gen6);
GEN6_USE(p, BINDING_TABLE_STATE, gen6);
GEN6_USE(p, SURFACE_STATE, gen6);
GEN6_USE(p, so_SURFACE_STATE, gen6);
GEN6_USE(p, SAMPLER_STATE, gen6);
GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen6);
GEN6_USE(p, push_constant_buffer, gen6);
#undef GEN6_USE
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h
0,0 → 1,165
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_3D_PIPELINE_GEN6_H
#define ILO_3D_PIPELINE_GEN6_H
 
#include "ilo_common.h"
 
struct ilo_3d_pipeline;
struct ilo_context;
 
struct gen6_pipeline_session {
uint32_t pipe_dirty;
 
int reduced_prim;
int init_cp_space;
 
bool hw_ctx_changed;
bool batch_bo_changed;
bool state_bo_changed;
bool kernel_bo_changed;
bool prim_changed;
bool primitive_restart_changed;
 
void (*emit_draw_states)(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void (*emit_draw_commands)(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
/* indirect states */
bool viewport_state_changed;
bool cc_state_blend_changed;
bool cc_state_dsa_changed;
bool cc_state_cc_changed;
bool scissor_state_changed;
bool binding_table_vs_changed;
bool binding_table_gs_changed;
bool binding_table_fs_changed;
bool sampler_state_vs_changed;
bool sampler_state_gs_changed;
bool sampler_state_fs_changed;
bool pcb_state_vs_changed;
bool pcb_state_gs_changed;
bool pcb_state_fs_changed;
 
int num_surfaces[PIPE_SHADER_TYPES];
};
 
void
gen6_pipeline_prepare(const struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_draw(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_end(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_common_select(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_common_sip(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_vf(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_vs(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_clip(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
gen6_pipeline_states(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
bool
gen6_pipeline_update_max_svbi(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session);
 
void
ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p);
 
void
ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index);
 
void
ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p,
struct intel_bo *bo, int index);
 
void
ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p);
 
#endif /* ILO_3D_PIPELINE_GEN6_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c
0,0 → 1,872
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_dual_blend.h"
#include "intel_reg.h"
 
#include "ilo_common.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_gpe_gen7.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_3d_pipeline.h"
#include "ilo_3d_pipeline_gen6.h"
#include "ilo_3d_pipeline_gen7.h"
 
static void
gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p,
bool change_multisample_state,
bool change_depth_state)
{
struct intel_bo *bo = NULL;
uint32_t dw1 = PIPE_CONTROL_CS_STALL;
 
assert(p->dev->gen == ILO_GEN(7));
 
/* emit once */
if (p->state.has_gen6_wa_pipe_control)
return;
p->state.has_gen6_wa_pipe_control = true;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 258:
*
* "Due to an HW issue driver needs to send a pipe control with stall
* when ever there is state change in depth bias related state"
*
* From the Ivy Bridge PRM, volume 2 part 1, page 292:
*
* "A PIPE_CONTOL command with the CS Stall bit set must be programmed
* in the ring after this instruction
* (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
*
* From the Ivy Bridge PRM, volume 2 part 1, page 304:
*
* "Driver must ierarchi that all the caches in the depth pipe are
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
* requires driver to send a PIPE_CONTROL with a CS stall along with a
* Depth Flush prior to this command.
*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Driver must send a least one PIPE_CONTROL command with CS Stall and
* a post sync operation prior to the group of depth
* commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
* 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
*/
 
if (change_multisample_state)
dw1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 
if (change_depth_state) {
dw1 |= PIPE_CONTROL_WRITE_IMMEDIATE;
bo = p->workaround_bo;
}
 
p->gen6_PIPE_CONTROL(p->dev, dw1, bo, 0, false, p->cp);
}
 
static void
gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
{
assert(p->dev->gen == ILO_GEN(7));
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 106:
*
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
* needs to be sent before any combination of VS associated 3DSTATE."
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_WRITE_IMMEDIATE,
p->workaround_bo, 0, false, p->cp);
}
 
static void
gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
bool change_depth_buffer)
{
assert(p->dev->gen == ILO_GEN(7));
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 276:
*
* "The driver must make sure a PIPE_CONTROL with the Depth Stall
* Enable bit set after all the following states are programmed:
*
* * 3DSTATE_PS
* * 3DSTATE_VIEWPORT_STATE_POINTERS_CC
* * 3DSTATE_CONSTANT_PS
* * 3DSTATE_BINDING_TABLE_POINTERS_PS
* * 3DSTATE_SAMPLER_STATE_POINTERS_PS
* * 3DSTATE_CC_STATE_POINTERS
* * 3DSTATE_BLEND_STATE_POINTERS
* * 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
* any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
* 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
* issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
* set), followed by a pipelined depth cache flush (PIPE_CONTROL with
* Depth Flush Bit set, followed by another pipelined depth stall
* (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
* guarantee that the pipeline from WM onwards is already flushed
* (e.g., via a preceding MI_FLUSH)."
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
 
if (!change_depth_buffer)
return;
 
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_CACHE_FLUSH,
NULL, 0, false, p->cp);
 
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false, p->cp);
}
 
static void
gen7_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
{
assert(p->dev->gen == ILO_GEN(7));
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 286:
*
* "If this field (Maximum Number of Threads in 3DSTATE_WM) is changed
* between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
* Pixel Scoreboard set is required to be issued."
*/
p->gen6_PIPE_CONTROL(p->dev,
PIPE_CONTROL_STALL_AT_SCOREBOARD,
NULL, 0, false, p->cp);
 
}
 
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
 
static void
gen7_pipeline_common_urb(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_URB_{VS,GS,HS,DS} */
if (DIRTY(VE) || DIRTY(VS)) {
/* the first 16KB are reserved for VS and PS PCBs */
const int offset = 16 * 1024;
int vs_entry_size, vs_total_size;
 
vs_entry_size = (ilo->vs) ?
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 35:
*
* "Programming Restriction: As the VS URB entry serves as both the
* per-vertex input and output of the VS shader, the VS URB
* Allocation Size must be sized to the maximum of the vertex input
* and output structures."
*/
if (vs_entry_size < ilo->ve->count)
vs_entry_size = ilo->ve->count;
 
vs_entry_size *= sizeof(float) * 4;
vs_total_size = ilo->dev->urb_size - offset;
 
gen7_wa_pipe_control_vs_depth_stall(p);
 
p->gen7_3DSTATE_URB_VS(p->dev,
offset, vs_total_size, vs_entry_size, p->cp);
 
p->gen7_3DSTATE_URB_GS(p->dev, offset, 0, 0, p->cp);
p->gen7_3DSTATE_URB_HS(p->dev, offset, 0, 0, p->cp);
p->gen7_3DSTATE_URB_DS(p->dev, offset, 0, 0, p->cp);
}
}
 
static void
gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
if (session->hw_ctx_changed) {
/*
* push constant buffers are only allowed to take up at most the first
* 16KB of the URB
*/
p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev,
0, 8192, p->cp);
 
p->gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev,
8192, 8192, p->cp);
 
gen7_wa_pipe_control_cs_stall(p, true, true);
}
}
 
static void
gen7_pipeline_common_pointers_1(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_VIEWPORT_STATE_POINTERS_{CC,SF_CLIP} */
if (session->viewport_state_changed) {
p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(p->dev,
p->state.CC_VIEWPORT, p->cp);
 
p->gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(p->dev,
p->state.SF_CLIP_VIEWPORT, p->cp);
}
}
 
static void
gen7_pipeline_common_pointers_2(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_BLEND_STATE_POINTERS */
if (session->cc_state_blend_changed) {
p->gen7_3DSTATE_BLEND_STATE_POINTERS(p->dev,
p->state.BLEND_STATE, p->cp);
}
 
/* 3DSTATE_CC_STATE_POINTERS */
if (session->cc_state_cc_changed) {
p->gen7_3DSTATE_CC_STATE_POINTERS(p->dev,
p->state.COLOR_CALC_STATE, p->cp);
}
 
/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS */
if (session->cc_state_dsa_changed) {
p->gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(p->dev,
p->state.DEPTH_STENCIL_STATE, p->cp);
}
}
 
static void
gen7_pipeline_vs(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
const bool emit_3dstate_binding_table = session->binding_table_vs_changed;
const bool emit_3dstate_sampler_state = session->sampler_state_vs_changed;
/* see gen6_pipeline_vs() */
const bool emit_3dstate_constant_vs = session->pcb_state_vs_changed;
const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS));
 
/* emit depth stall before any of the VS commands */
if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
emit_3dstate_constant_vs || emit_3dstate_vs)
gen7_wa_pipe_control_vs_depth_stall(p);
 
/* 3DSTATE_BINDING_TABLE_POINTERS_VS */
if (emit_3dstate_binding_table) {
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(p->dev,
p->state.vs.BINDING_TABLE_STATE, p->cp);
}
 
/* 3DSTATE_SAMPLER_STATE_POINTERS_VS */
if (emit_3dstate_sampler_state) {
p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(p->dev,
p->state.vs.SAMPLER_STATE, p->cp);
}
 
gen6_pipeline_vs(p, ilo, session);
}
 
static void
gen7_pipeline_hs(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
if (session->hw_ctx_changed) {
p->gen7_3DSTATE_CONSTANT_HS(p->dev, 0, 0, 0, p->cp);
p->gen7_3DSTATE_HS(p->dev, NULL, 0, p->cp);
}
 
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
if (session->hw_ctx_changed)
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(p->dev, 0, p->cp);
}
 
static void
gen7_pipeline_te(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_TE */
if (session->hw_ctx_changed)
p->gen7_3DSTATE_TE(p->dev, p->cp);
}
 
static void
gen7_pipeline_ds(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
if (session->hw_ctx_changed) {
p->gen7_3DSTATE_CONSTANT_DS(p->dev, 0, 0, 0, p->cp);
p->gen7_3DSTATE_DS(p->dev, NULL, 0, p->cp);
}
 
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
if (session->hw_ctx_changed)
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(p->dev, 0, p->cp);
 
}
 
static void
gen7_pipeline_gs(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
if (session->hw_ctx_changed) {
p->gen6_3DSTATE_CONSTANT_GS(p->dev, 0, 0, 0, p->cp);
p->gen7_3DSTATE_GS(p->dev, NULL, 0, p->cp);
}
 
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
if (session->binding_table_gs_changed) {
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(p->dev,
p->state.gs.BINDING_TABLE_STATE, p->cp);
}
}
 
static void
gen7_pipeline_sol(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
const struct pipe_stream_output_info *so_info;
const struct ilo_shader_state *shader;
bool dirty_sh = false;
 
if (ilo->gs) {
shader = ilo->gs;
dirty_sh = DIRTY(GS);
}
else {
shader = ilo->vs;
dirty_sh = DIRTY(VS);
}
 
so_info = ilo_shader_get_kernel_so_info(shader);
 
gen6_pipeline_update_max_svbi(p, ilo, session);
 
/* 3DSTATE_SO_BUFFER */
if ((DIRTY(SO) || dirty_sh || session->batch_bo_changed) &&
ilo->so.enabled) {
int i;
 
for (i = 0; i < ilo->so.count; i++) {
const int stride = so_info->stride[i] * 4; /* in bytes */
int base = 0;
 
/* reset HW write offsets and offset buffer base */
if (!p->cp->render_ctx) {
ilo_cp_set_one_off_flags(p->cp, INTEL_EXEC_GEN7_SOL_RESET);
base += p->state.so_num_vertices * stride;
}
 
p->gen7_3DSTATE_SO_BUFFER(p->dev, i, base, stride,
ilo->so.states[i], p->cp);
}
 
for (; i < 4; i++)
p->gen7_3DSTATE_SO_BUFFER(p->dev, i, 0, 0, NULL, p->cp);
}
 
/* 3DSTATE_SO_DECL_LIST */
if (dirty_sh && ilo->so.enabled)
p->gen7_3DSTATE_SO_DECL_LIST(p->dev, so_info, p->cp);
 
/* 3DSTATE_STREAMOUT */
if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
const unsigned buffer_mask = (1 << ilo->so.count) - 1;
const int output_count = ilo_shader_get_kernel_param(shader,
ILO_KERNEL_OUTPUT_COUNT);
 
p->gen7_3DSTATE_STREAMOUT(p->dev, buffer_mask, output_count,
ilo->rasterizer->state.rasterizer_discard, p->cp);
}
}
 
static void
gen7_pipeline_sf(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_SBE */
if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) {
p->gen7_3DSTATE_SBE(p->dev, ilo->rasterizer, ilo->fs,
(ilo->gs) ? ilo->gs : ilo->vs, ilo->cp);
}
 
/* 3DSTATE_SF */
if (DIRTY(RASTERIZER) || DIRTY(FB)) {
gen7_wa_pipe_control_cs_stall(p, true, true);
p->gen7_3DSTATE_SF(p->dev, ilo->rasterizer, ilo->fb.state.zsbuf, p->cp);
}
}
 
static void
gen7_pipeline_wm(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_WM */
if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) {
const bool cc_may_kill = (ilo->dsa->alpha.enabled ||
ilo->blend->alpha_to_coverage);
 
if (p->dev->gen == ILO_GEN(7) && session->hw_ctx_changed)
gen7_wa_pipe_control_wm_max_threads_stall(p);
 
p->gen7_3DSTATE_WM(p->dev, ilo->fs,
ilo->rasterizer, cc_may_kill, p->cp);
}
 
/* 3DSTATE_BINDING_TABLE_POINTERS_PS */
if (session->binding_table_fs_changed) {
p->gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(p->dev,
p->state.wm.BINDING_TABLE_STATE, p->cp);
}
 
/* 3DSTATE_SAMPLER_STATE_POINTERS_PS */
if (session->sampler_state_fs_changed) {
p->gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(p->dev,
p->state.wm.SAMPLER_STATE, p->cp);
}
 
/* 3DSTATE_CONSTANT_PS */
if (session->pcb_state_fs_changed)
p->gen6_3DSTATE_CONSTANT_PS(p->dev, NULL, NULL, 0, p->cp);
 
/* 3DSTATE_PS */
if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) ||
session->kernel_bo_changed) {
const int num_samplers = ilo->sampler[PIPE_SHADER_FRAGMENT].count;
const bool dual_blend = ilo->blend->dual_blend;
 
p->gen7_3DSTATE_PS(p->dev, ilo->fs, num_samplers, dual_blend, p->cp);
}
 
/* 3DSTATE_SCISSOR_STATE_POINTERS */
if (session->scissor_state_changed) {
p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(p->dev,
p->state.SCISSOR_RECT, p->cp);
}
 
/* XXX what is the best way to know if this workaround is needed? */
{
const bool emit_3dstate_ps =
(DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND));
const bool emit_3dstate_depth_buffer =
(DIRTY(FB) || DIRTY(DSA) || session->state_bo_changed);
 
if (emit_3dstate_ps ||
emit_3dstate_depth_buffer ||
session->pcb_state_fs_changed ||
session->viewport_state_changed ||
session->binding_table_fs_changed ||
session->sampler_state_fs_changed ||
session->cc_state_cc_changed ||
session->cc_state_blend_changed ||
session->cc_state_dsa_changed)
gen7_wa_pipe_control_wm_depth_stall(p, emit_3dstate_depth_buffer);
}
 
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
if (DIRTY(FB) || session->batch_bo_changed) {
const struct ilo_zs_surface *zs;
 
if (ilo->fb.state.zsbuf) {
const struct ilo_surface_cso *surface =
(const struct ilo_surface_cso *) ilo->fb.state.zsbuf;
 
assert(!surface->is_rt);
zs = &surface->u.zs;
}
else {
zs = &ilo->fb.null_zs;
}
 
p->gen7_3DSTATE_DEPTH_BUFFER(p->dev, zs, p->cp);
p->gen6_3DSTATE_HIER_DEPTH_BUFFER(p->dev, zs, p->cp);
p->gen6_3DSTATE_STENCIL_BUFFER(p->dev, zs, p->cp);
 
/* TODO */
p->gen6_3DSTATE_CLEAR_PARAMS(p->dev, 0, p->cp);
}
}
 
static void
gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
const uint32_t *packed_sample_pos;
 
gen7_wa_pipe_control_cs_stall(p, true, true);
 
packed_sample_pos =
(ilo->fb.num_samples > 4) ? p->packed_sample_position_8x :
(ilo->fb.num_samples > 1) ? &p->packed_sample_position_4x :
&p->packed_sample_position_1x;
 
p->gen6_3DSTATE_MULTISAMPLE(p->dev,
ilo->fb.num_samples, packed_sample_pos,
ilo->rasterizer->state.half_pixel_center, p->cp);
 
p->gen7_3DSTATE_SAMPLE_MASK(p->dev,
(ilo->fb.num_samples > 1) ? ilo->sample_mask : 0x1,
ilo->fb.num_samples, p->cp);
}
}
 
static void
gen7_pipeline_commands(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo,
struct gen6_pipeline_session *session)
{
/*
* We try to keep the order of the commands match, as closely as possible,
* that of the classic i965 driver. It allows us to compare the command
* streams easily.
*/
gen6_pipeline_common_select(p, ilo, session);
gen6_pipeline_common_sip(p, ilo, session);
gen6_pipeline_vf_statistics(p, ilo, session);
gen7_pipeline_common_pcb_alloc(p, ilo, session);
gen6_pipeline_common_base_address(p, ilo, session);
gen7_pipeline_common_pointers_1(p, ilo, session);
gen7_pipeline_common_urb(p, ilo, session);
gen7_pipeline_common_pointers_2(p, ilo, session);
gen7_pipeline_wm_multisample(p, ilo, session);
gen7_pipeline_gs(p, ilo, session);
gen7_pipeline_hs(p, ilo, session);
gen7_pipeline_te(p, ilo, session);
gen7_pipeline_ds(p, ilo, session);
gen7_pipeline_vs(p, ilo, session);
gen7_pipeline_sol(p, ilo, session);
gen6_pipeline_clip(p, ilo, session);
gen7_pipeline_sf(p, ilo, session);
gen7_pipeline_wm(p, ilo, session);
gen6_pipeline_wm_raster(p, ilo, session);
gen6_pipeline_sf_rect(p, ilo, session);
gen6_pipeline_vf(p, ilo, session);
gen6_pipeline_vf_draw(p, ilo, session);
}
 
static void
ilo_3d_pipeline_emit_draw_gen7(struct ilo_3d_pipeline *p,
const struct ilo_context *ilo)
{
struct gen6_pipeline_session session;
 
gen6_pipeline_prepare(p, ilo, &session);
 
session.emit_draw_states = gen6_pipeline_states;
session.emit_draw_commands = gen7_pipeline_commands;
 
gen6_pipeline_draw(p, ilo, &session);
gen6_pipeline_end(p, ilo, &session);
}
 
static int
gen7_pipeline_estimate_commands(const struct ilo_3d_pipeline *p,
const struct ilo_gpe_gen7 *gen7,
const struct ilo_context *ilo)
{
static int size;
enum ilo_gpe_gen7_command cmd;
 
if (size)
return size;
 
for (cmd = 0; cmd < ILO_GPE_GEN7_COMMAND_COUNT; cmd++) {
int count;
 
switch (cmd) {
case ILO_GPE_GEN7_PIPE_CONTROL:
/* for the workaround */
count = 2;
/* another one after 3DSTATE_URB */
count += 1;
/* and another one after 3DSTATE_CONSTANT_VS */
count += 1;
break;
case ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS:
count = 33;
break;
case ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS:
count = 34;
break;
case ILO_GPE_GEN7_MEDIA_VFE_STATE:
case ILO_GPE_GEN7_MEDIA_CURBE_LOAD:
case ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD:
case ILO_GPE_GEN7_MEDIA_STATE_FLUSH:
case ILO_GPE_GEN7_GPGPU_WALKER:
/* media commands */
count = 0;
break;
default:
count = 1;
break;
}
 
if (count) {
size += gen7->estimate_command_size(p->dev,
cmd, count);
}
}
 
return size;
}
 
static int
gen7_pipeline_estimate_states(const struct ilo_3d_pipeline *p,
const struct ilo_gpe_gen7 *gen7,
const struct ilo_context *ilo)
{
static int static_size;
int shader_type, count, size;
 
if (!static_size) {
struct {
enum ilo_gpe_gen7_state state;
int count;
} static_states[] = {
/* viewports */
{ ILO_GPE_GEN7_SF_CLIP_VIEWPORT, 1 },
{ ILO_GPE_GEN7_CC_VIEWPORT, 1 },
/* cc */
{ ILO_GPE_GEN7_COLOR_CALC_STATE, 1 },
{ ILO_GPE_GEN7_BLEND_STATE, ILO_MAX_DRAW_BUFFERS },
{ ILO_GPE_GEN7_DEPTH_STENCIL_STATE, 1 },
/* scissors */
{ ILO_GPE_GEN7_SCISSOR_RECT, 1 },
/* binding table (vs, gs, fs) */
{ ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES },
{ ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES },
{ ILO_GPE_GEN7_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES },
};
int i;
 
for (i = 0; i < Elements(static_states); i++) {
static_size += gen7->estimate_state_size(p->dev,
static_states[i].state,
static_states[i].count);
}
}
 
size = static_size;
 
/*
* render targets (fs)
* sampler views (vs, fs)
* constant buffers (vs, fs)
*/
count = ilo->fb.state.nr_cbufs;
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
count += ilo->view[shader_type].count;
count += util_bitcount(ilo->cbuf[shader_type].enabled_mask);
}
 
if (count) {
size += gen7->estimate_state_size(p->dev,
ILO_GPE_GEN7_SURFACE_STATE, count);
}
 
/* samplers (vs, fs) */
for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) {
count = ilo->sampler[shader_type].count;
if (count) {
size += gen7->estimate_state_size(p->dev,
ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE, count);
size += gen7->estimate_state_size(p->dev,
ILO_GPE_GEN7_SAMPLER_STATE, count);
}
}
 
/* pcb (vs) */
if (ilo->vs &&
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)) {
const int pcb_size =
ilo_shader_get_kernel_param(ilo->vs, ILO_KERNEL_VS_PCB_UCP_SIZE);
 
size += gen7->estimate_state_size(p->dev,
ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER, pcb_size);
}
 
return size;
}
 
static int
ilo_3d_pipeline_estimate_size_gen7(struct ilo_3d_pipeline *p,
enum ilo_3d_pipeline_action action,
const void *arg)
{
const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get();
int size;
 
switch (action) {
case ILO_3D_PIPELINE_DRAW:
{
const struct ilo_context *ilo = arg;
 
size = gen7_pipeline_estimate_commands(p, gen7, ilo) +
gen7_pipeline_estimate_states(p, gen7, ilo);
}
break;
case ILO_3D_PIPELINE_FLUSH:
case ILO_3D_PIPELINE_WRITE_TIMESTAMP:
case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT:
size = gen7->estimate_command_size(p->dev,
ILO_GPE_GEN7_PIPE_CONTROL, 1);
break;
default:
assert(!"unknown 3D pipeline action");
size = 0;
break;
}
 
return size;
}
 
void
ilo_3d_pipeline_init_gen7(struct ilo_3d_pipeline *p)
{
const struct ilo_gpe_gen7 *gen7 = ilo_gpe_gen7_get();
 
p->estimate_size = ilo_3d_pipeline_estimate_size_gen7;
p->emit_draw = ilo_3d_pipeline_emit_draw_gen7;
p->emit_flush = ilo_3d_pipeline_emit_flush_gen6;
p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6;
p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6;
 
#define GEN6_USE(p, name, from) \
p->gen6_ ## name = from->emit_ ## name
GEN6_USE(p, STATE_BASE_ADDRESS, gen7);
GEN6_USE(p, STATE_SIP, gen7);
GEN6_USE(p, PIPELINE_SELECT, gen7);
GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen7);
GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen7);
GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen7);
GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen7);
GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen7);
GEN6_USE(p, 3DSTATE_VS, gen7);
GEN6_USE(p, 3DSTATE_CLIP, gen7);
GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen7);
GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen7);
GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen7);
GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen7);
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen7);
GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen7);
GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen7);
GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen7);
GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen7);
GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen7);
GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen7);
GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen7);
GEN6_USE(p, PIPE_CONTROL, gen7);
GEN6_USE(p, 3DPRIMITIVE, gen7);
GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen7);
GEN6_USE(p, CC_VIEWPORT, gen7);
GEN6_USE(p, COLOR_CALC_STATE, gen7);
GEN6_USE(p, BLEND_STATE, gen7);
GEN6_USE(p, DEPTH_STENCIL_STATE, gen7);
GEN6_USE(p, SCISSOR_RECT, gen7);
GEN6_USE(p, BINDING_TABLE_STATE, gen7);
GEN6_USE(p, SURFACE_STATE, gen7);
GEN6_USE(p, SAMPLER_STATE, gen7);
GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen7);
GEN6_USE(p, push_constant_buffer, gen7);
#undef GEN6_USE
 
#define GEN7_USE(p, name, from) \
p->gen7_ ## name = from->emit_ ## name
GEN7_USE(p, 3DSTATE_DEPTH_BUFFER, gen7);
GEN7_USE(p, 3DSTATE_CC_STATE_POINTERS, gen7);
GEN7_USE(p, 3DSTATE_GS, gen7);
GEN7_USE(p, 3DSTATE_SF, gen7);
GEN7_USE(p, 3DSTATE_WM, gen7);
GEN7_USE(p, 3DSTATE_SAMPLE_MASK, gen7);
GEN7_USE(p, 3DSTATE_CONSTANT_HS, gen7);
GEN7_USE(p, 3DSTATE_CONSTANT_DS, gen7);
GEN7_USE(p, 3DSTATE_HS, gen7);
GEN7_USE(p, 3DSTATE_TE, gen7);
GEN7_USE(p, 3DSTATE_DS, gen7);
GEN7_USE(p, 3DSTATE_STREAMOUT, gen7);
GEN7_USE(p, 3DSTATE_SBE, gen7);
GEN7_USE(p, 3DSTATE_PS, gen7);
GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, gen7);
GEN7_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS_CC, gen7);
GEN7_USE(p, 3DSTATE_BLEND_STATE_POINTERS, gen7);
GEN7_USE(p, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS, gen7);
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_VS, gen7);
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_HS, gen7);
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_DS, gen7);
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_GS, gen7);
GEN7_USE(p, 3DSTATE_BINDING_TABLE_POINTERS_PS, gen7);
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_VS, gen7);
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_HS, gen7);
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_DS, gen7);
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_GS, gen7);
GEN7_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS_PS, gen7);
GEN7_USE(p, 3DSTATE_URB_VS, gen7);
GEN7_USE(p, 3DSTATE_URB_HS, gen7);
GEN7_USE(p, 3DSTATE_URB_DS, gen7);
GEN7_USE(p, 3DSTATE_URB_GS, gen7);
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_VS, gen7);
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_HS, gen7);
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_DS, gen7);
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_GS, gen7);
GEN7_USE(p, 3DSTATE_PUSH_CONSTANT_ALLOC_PS, gen7);
GEN7_USE(p, 3DSTATE_SO_DECL_LIST, gen7);
GEN7_USE(p, 3DSTATE_SO_BUFFER, gen7);
GEN7_USE(p, SF_CLIP_VIEWPORT, gen7);
#undef GEN7_USE
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.h
0,0 → 1,38
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_3D_PIPELINE_GEN7_H
#define ILO_3D_PIPELINE_GEN7_H
 
#include "ilo_common.h"
 
struct ilo_3d_pipeline;
 
void
ilo_3d_pipeline_init_gen7(struct ilo_3d_pipeline *p);
 
#endif /* ILO_3D_PIPELINE_GEN7_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blit.c
0,0 → 1,143
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_surface.h"
 
#include "ilo_blitter.h"
#include "ilo_context.h"
#include "ilo_blit.h"
 
static void
ilo_resource_copy_region(struct pipe_context *pipe,
struct pipe_resource *dst,
unsigned dst_level,
unsigned dstx, unsigned dsty, unsigned dstz,
struct pipe_resource *src,
unsigned src_level,
const struct pipe_box *src_box)
{
struct ilo_context *ilo = ilo_context(pipe);
 
if (ilo_blitter_blt_copy_resource(ilo->blitter,
dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box))
return;
 
if (ilo_blitter_pipe_copy_resource(ilo->blitter,
dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box))
return;
 
util_resource_copy_region(&ilo->base, dst, dst_level,
dstx, dsty, dstz, src, src_level, src_box);
}
 
static void
ilo_clear(struct pipe_context *pipe,
unsigned buffers,
const union pipe_color_union *color,
double depth,
unsigned stencil)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_blitter_pipe_clear_fb(ilo->blitter, buffers, color, depth, stencil);
}
 
static void
ilo_clear_render_target(struct pipe_context *pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
struct ilo_context *ilo = ilo_context(pipe);
 
if (!width || !height || dstx >= dst->width || dsty >= dst->height)
return;
 
if (dstx + width > dst->width)
width = dst->width - dstx;
if (dsty + height > dst->height)
height = dst->height - dsty;
 
if (ilo_blitter_blt_clear_rt(ilo->blitter,
dst, color, dstx, dsty, width, height))
return;
 
ilo_blitter_pipe_clear_rt(ilo->blitter,
dst, color, dstx, dsty, width, height);
}
 
static void
ilo_clear_depth_stencil(struct pipe_context *pipe,
struct pipe_surface *dst,
unsigned clear_flags,
double depth,
unsigned stencil,
unsigned dstx, unsigned dsty,
unsigned width, unsigned height)
{
struct ilo_context *ilo = ilo_context(pipe);
 
if (!width || !height || dstx >= dst->width || dsty >= dst->height)
return;
 
if (dstx + width > dst->width)
width = dst->width - dstx;
if (dsty + height > dst->height)
height = dst->height - dsty;
 
if (ilo_blitter_blt_clear_zs(ilo->blitter,
dst, clear_flags, depth, stencil, dstx, dsty, width, height))
return;
 
ilo_blitter_pipe_clear_zs(ilo->blitter,
dst, clear_flags, depth, stencil, dstx, dsty, width, height);
}
 
static void
ilo_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_blitter_pipe_blit(ilo->blitter, info);
}
 
/**
* Initialize blit-related functions.
*/
void
ilo_init_blit_functions(struct ilo_context *ilo)
{
ilo->base.resource_copy_region = ilo_resource_copy_region;
ilo->base.blit = ilo_blit;
 
ilo->base.clear = ilo_clear;
ilo->base.clear_render_target = ilo_clear_render_target;
ilo->base.clear_depth_stencil = ilo_clear_depth_stencil;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blit.h
0,0 → 1,38
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BLIT_H
#define ILO_BLIT_H
 
#include "ilo_common.h"
 
struct ilo_context;
 
void
ilo_init_blit_functions(struct ilo_context *ilo);
 
#endif /* ILO_BLIT_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter.c
0,0 → 1,74
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_blitter.h"
 
#include "ilo_context.h"
#include "ilo_blitter.h"
 
static bool
ilo_blitter_pipe_create(struct ilo_blitter *blitter)
{
if (blitter->pipe_blitter)
return true;
 
blitter->pipe_blitter = util_blitter_create(&blitter->ilo->base);
 
return (blitter->pipe_blitter != NULL);
}
 
/**
* Create a blitter. Because the use of util_blitter, this must be called
* after the context is initialized.
*/
struct ilo_blitter *
ilo_blitter_create(struct ilo_context *ilo)
{
struct ilo_blitter *blitter;
 
blitter = CALLOC_STRUCT(ilo_blitter);
if (!blitter)
return NULL;
 
blitter->ilo = ilo;
 
if (!ilo_blitter_pipe_create(blitter)) {
FREE(blitter);
return NULL;
}
 
return blitter;
}
 
void
ilo_blitter_destroy(struct ilo_blitter *blitter)
{
if (blitter->pipe_blitter)
util_blitter_destroy(blitter->pipe_blitter);
 
FREE(blitter);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter.h
0,0 → 1,102
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_BLITTER_H
#define ILO_BLITTER_H
 
#include "ilo_common.h"
 
struct ilo_context;
struct blitter_context;
 
struct ilo_blitter {
struct ilo_context *ilo;
 
struct blitter_context *pipe_blitter;
};
 
struct ilo_blitter *
ilo_blitter_create(struct ilo_context *ilo);
 
void
ilo_blitter_destroy(struct ilo_blitter *blitter);
 
bool
ilo_blitter_pipe_blit(struct ilo_blitter *blitter,
const struct pipe_blit_info *info);
 
bool
ilo_blitter_pipe_copy_resource(struct ilo_blitter *blitter,
struct pipe_resource *dst, unsigned dst_level,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
struct pipe_resource *src, unsigned src_level,
const struct pipe_box *src_box);
 
bool
ilo_blitter_pipe_clear_rt(struct ilo_blitter *blitter,
struct pipe_surface *rt,
const union pipe_color_union *color,
unsigned x, unsigned y,
unsigned width, unsigned height);
 
bool
ilo_blitter_pipe_clear_zs(struct ilo_blitter *blitter,
struct pipe_surface *zs,
unsigned clear_flags,
double depth, unsigned stencil,
unsigned x, unsigned y,
unsigned width, unsigned height);
 
bool
ilo_blitter_pipe_clear_fb(struct ilo_blitter *blitter,
unsigned buffers,
const union pipe_color_union *color,
double depth, unsigned stencil);
 
bool
ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter,
struct pipe_resource *dst, unsigned dst_level,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
struct pipe_resource *src, unsigned src_level,
const struct pipe_box *src_box);
 
bool
ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter,
struct pipe_surface *rt,
const union pipe_color_union *color,
unsigned x, unsigned y,
unsigned width, unsigned height);
 
bool
ilo_blitter_blt_clear_zs(struct ilo_blitter *blitter,
struct pipe_surface *zs,
unsigned clear_flags,
double depth, unsigned stencil,
unsigned x, unsigned y,
unsigned width, unsigned height);
 
#endif /* ILO_BLITTER_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter_blt.c
0,0 → 1,812
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_pack_color.h"
#include "intel_reg.h"
 
#include "ilo_3d.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_resource.h"
#include "ilo_blitter.h"
 
#ifndef COLOR_BLT_CMD
#define COLOR_BLT_CMD (CMD_2D | (0x40 << 22))
#endif
 
#ifndef SRC_COPY_BLT_CMD
#define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22))
#endif
 
enum gen6_blt_mask {
GEN6_BLT_MASK_8,
GEN6_BLT_MASK_16,
GEN6_BLT_MASK_32,
GEN6_BLT_MASK_32_LO,
GEN6_BLT_MASK_32_HI,
};
 
/*
* From the Sandy Bridge PRM, volume 1 part 5, page 7:
*
* "The BLT engine is capable of transferring very large quantities of
* graphics data. Any graphics data read from and written to the
* destination is permitted to represent a number of pixels that occupies
* up to 65,536 scan lines and up to 32,768 bytes per scan line at the
* destination. The maximum number of pixels that may be represented per
* scan line's worth of graphics data depends on the color depth."
*/
static const int gen6_max_bytes_per_scanline = 32768;
static const int gen6_max_scanlines = 65536;
 
static void
gen6_emit_MI_FLUSH_DW(struct ilo_dev_info *dev, struct ilo_cp *cp)
{
const uint8_t cmd_len = 4;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, MI_FLUSH_DW | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
}
 
static void
gen6_emit_MI_LOAD_REGISTER_IMM(struct ilo_dev_info *dev,
uint32_t reg, uint32_t val,
struct ilo_cp *cp)
{
const uint8_t cmd_len = 3;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, MI_LOAD_REGISTER_IMM | (cmd_len - 2));
ilo_cp_write(cp, reg);
ilo_cp_write(cp, val);
ilo_cp_end(cp);
}
 
static uint32_t
gen6_translate_blt_value_mask(enum gen6_blt_mask value_mask)
{
switch (value_mask) {
case GEN6_BLT_MASK_8: return BR13_8;
case GEN6_BLT_MASK_16: return BR13_565;
default: return BR13_8888;
}
}
 
static uint32_t
gen6_translate_blt_write_mask(enum gen6_blt_mask write_mask)
{
switch (write_mask) {
case GEN6_BLT_MASK_32: return XY_BLT_WRITE_RGB |
XY_BLT_WRITE_ALPHA;
case GEN6_BLT_MASK_32_LO: return XY_BLT_WRITE_RGB;
case GEN6_BLT_MASK_32_HI: return XY_BLT_WRITE_ALPHA;
default: return 0;
}
}
 
static uint32_t
gen6_translate_blt_cpp(enum gen6_blt_mask mask)
{
switch (mask) {
case GEN6_BLT_MASK_8: return 1;
case GEN6_BLT_MASK_16: return 2;
default: return 4;
}
}
 
static void
gen6_emit_COLOR_BLT(struct ilo_dev_info *dev,
struct intel_bo *dst_bo,
int16_t dst_pitch, uint32_t dst_offset,
uint16_t width, uint16_t height,
uint32_t pattern, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask,
struct ilo_cp *cp)
{
const uint8_t cmd_len = 5;
const int cpp = gen6_translate_blt_cpp(value_mask);
uint32_t dw0, dw1;
 
dw0 = COLOR_BLT_CMD |
gen6_translate_blt_write_mask(write_mask) |
(cmd_len - 2);
 
assert(width < gen6_max_bytes_per_scanline);
assert(height < gen6_max_scanlines);
/* offsets are naturally aligned and pitches are dword-aligned */
assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0);
 
dw1 = rop << 16 |
gen6_translate_blt_value_mask(value_mask) |
dst_pitch;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, dw0);
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, height << 16 | width);
ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER,
INTEL_DOMAIN_RENDER);
ilo_cp_write(cp, pattern);
ilo_cp_end(cp);
}
 
static void
gen6_emit_XY_COLOR_BLT(struct ilo_dev_info *dev,
struct intel_bo *dst_bo,
enum intel_tiling_mode dst_tiling,
int16_t dst_pitch, uint32_t dst_offset,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint32_t pattern, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask,
struct ilo_cp *cp)
{
const uint8_t cmd_len = 6;
const int cpp = gen6_translate_blt_cpp(value_mask);
int dst_align, dst_pitch_shift;
uint32_t dw0, dw1;
 
dw0 = XY_COLOR_BLT_CMD |
gen6_translate_blt_write_mask(write_mask) |
(cmd_len - 2);
 
if (dst_tiling == INTEL_TILING_NONE) {
dst_align = 4;
dst_pitch_shift = 0;
}
else {
dw0 |= XY_DST_TILED;
 
dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512;
/* in dwords when tiled */
dst_pitch_shift = 2;
}
 
assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline);
assert(y2 - y1 < gen6_max_scanlines);
assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0);
 
dw1 = rop << 16 |
gen6_translate_blt_value_mask(value_mask) |
dst_pitch >> dst_pitch_shift;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, dw0);
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, y1 << 16 | x1);
ilo_cp_write(cp, y2 << 16 | x2);
ilo_cp_write_bo(cp, dst_offset, dst_bo,
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_write(cp, pattern);
ilo_cp_end(cp);
}
 
static void
gen6_emit_SRC_COPY_BLT(struct ilo_dev_info *dev,
struct intel_bo *dst_bo,
int16_t dst_pitch, uint32_t dst_offset,
uint16_t width, uint16_t height,
struct intel_bo *src_bo,
int16_t src_pitch, uint32_t src_offset,
bool dir_rtl, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask,
struct ilo_cp *cp)
{
const uint8_t cmd_len = 6;
const int cpp = gen6_translate_blt_cpp(value_mask);
uint32_t dw0, dw1;
 
dw0 = SRC_COPY_BLT_CMD |
gen6_translate_blt_write_mask(write_mask) |
(cmd_len - 2);
 
assert(width < gen6_max_bytes_per_scanline);
assert(height < gen6_max_scanlines);
/* offsets are naturally aligned and pitches are dword-aligned */
assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0);
assert(src_offset % cpp == 0 && src_pitch % 4 == 0);
 
dw1 = rop << 16 |
gen6_translate_blt_value_mask(value_mask) |
dst_pitch;
 
if (dir_rtl)
dw1 |= 1 << 30;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, dw0);
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, height << 16 | width);
ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER,
INTEL_DOMAIN_RENDER);
ilo_cp_write(cp, src_pitch);
ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0);
ilo_cp_end(cp);
}
 
static void
gen6_emit_XY_SRC_COPY_BLT(struct ilo_dev_info *dev,
struct intel_bo *dst_bo,
enum intel_tiling_mode dst_tiling,
int16_t dst_pitch, uint32_t dst_offset,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
struct intel_bo *src_bo,
enum intel_tiling_mode src_tiling,
int16_t src_pitch, uint32_t src_offset,
int16_t src_x, int16_t src_y, uint8_t rop,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask,
struct ilo_cp *cp)
{
const uint8_t cmd_len = 8;
const int cpp = gen6_translate_blt_cpp(value_mask);
int dst_align, dst_pitch_shift;
int src_align, src_pitch_shift;
uint32_t dw0, dw1;
 
dw0 = XY_SRC_COPY_BLT_CMD |
gen6_translate_blt_write_mask(write_mask) |
(cmd_len - 2);
 
if (dst_tiling == INTEL_TILING_NONE) {
dst_align = 4;
dst_pitch_shift = 0;
}
else {
dw0 |= XY_DST_TILED;
 
dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512;
/* in dwords when tiled */
dst_pitch_shift = 2;
}
 
if (src_tiling == INTEL_TILING_NONE) {
src_align = 4;
src_pitch_shift = 0;
}
else {
dw0 |= XY_SRC_TILED;
 
src_align = (src_tiling == INTEL_TILING_Y) ? 128 : 512;
/* in dwords when tiled */
src_pitch_shift = 2;
}
 
assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline);
assert(y2 - y1 < gen6_max_scanlines);
assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0);
assert(src_offset % src_align == 0 && src_pitch % src_align == 0);
 
dw1 = rop << 16 |
gen6_translate_blt_value_mask(value_mask) |
dst_pitch >> dst_pitch_shift;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, dw0);
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, y1 << 16 | x1);
ilo_cp_write(cp, y2 << 16 | x2);
ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER,
INTEL_DOMAIN_RENDER);
ilo_cp_write(cp, src_y << 16 | src_x);
ilo_cp_write(cp, src_pitch >> src_pitch_shift);
ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0);
ilo_cp_end(cp);
}
 
static uint32_t
ilo_blitter_blt_begin(struct ilo_blitter *blitter, int max_cmd_size,
struct intel_bo *dst, enum intel_tiling_mode dst_tiling,
struct intel_bo *src, enum intel_tiling_mode src_tiling)
{
struct ilo_context *ilo = blitter->ilo;
struct intel_bo *aper_check[3];
int count;
uint32_t swctrl;
 
/* change ring */
ilo_cp_set_ring(ilo->cp, ILO_CP_RING_BLT);
ilo_cp_set_owner(ilo->cp, NULL, 0);
 
/* check aperture space */
aper_check[0] = ilo->cp->bo;
aper_check[1] = dst;
count = 2;
 
if (src) {
aper_check[2] = src;
count++;
}
 
if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, count))
ilo_cp_flush(ilo->cp);
 
/* set BCS_SWCTRL */
swctrl = 0x0;
 
if (dst_tiling == INTEL_TILING_Y) {
swctrl |= BCS_SWCTRL_DST_Y << 16 |
BCS_SWCTRL_DST_Y;
}
 
if (src && src_tiling == INTEL_TILING_Y) {
swctrl |= BCS_SWCTRL_SRC_Y << 16 |
BCS_SWCTRL_SRC_Y;
}
 
if (swctrl) {
/*
* Most clients expect BLT engine to be stateless. If we have to set
* BCS_SWCTRL to a non-default value, we have to set it back in the same
* batch buffer.
*/
if (ilo_cp_space(ilo->cp) < (4 + 3) * 2 + max_cmd_size)
ilo_cp_flush(ilo->cp);
 
ilo_cp_assert_no_implicit_flush(ilo->cp, true);
 
/*
* From the Ivy Bridge PRM, volume 1 part 4, page 133:
*
* "SW is required to flush the HW before changing the polarity of
* this bit (Tile Y Destination/Source)."
*/
gen6_emit_MI_FLUSH_DW(ilo->dev, ilo->cp);
gen6_emit_MI_LOAD_REGISTER_IMM(ilo->dev, BCS_SWCTRL, swctrl, ilo->cp);
 
swctrl &= ~(BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y);
}
 
return swctrl;
}
 
static void
ilo_blitter_blt_end(struct ilo_blitter *blitter, uint32_t swctrl)
{
struct ilo_context *ilo = blitter->ilo;
 
/* set BCS_SWCTRL back */
if (swctrl) {
gen6_emit_MI_FLUSH_DW(ilo->dev, ilo->cp);
gen6_emit_MI_LOAD_REGISTER_IMM(ilo->dev, BCS_SWCTRL, swctrl, ilo->cp);
 
ilo_cp_assert_no_implicit_flush(ilo->cp, false);
}
}
 
static bool
buf_clear_region(struct ilo_blitter *blitter,
struct ilo_buffer *dst,
unsigned dst_offset, unsigned dst_size,
uint32_t val,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
{
const uint8_t rop = 0xf0; /* PATCOPY */
const int cpp = gen6_translate_blt_cpp(value_mask);
struct ilo_context *ilo = blitter->ilo;
unsigned offset = 0;
 
if (dst_offset % cpp || dst_size % cpp)
return false;
 
ilo_blitter_blt_begin(blitter, 0,
dst->bo, INTEL_TILING_NONE, NULL, INTEL_TILING_NONE);
 
while (dst_size) {
unsigned width, height;
int16_t pitch;
 
width = dst_size;
height = 1;
pitch = 0;
 
if (width > gen6_max_bytes_per_scanline) {
/* less than INT16_MAX and dword-aligned */
pitch = 32764;
 
width = pitch;
height = dst_size / width;
if (height > gen6_max_scanlines)
height = gen6_max_scanlines;
}
 
gen6_emit_COLOR_BLT(ilo->dev, dst->bo, pitch, dst_offset + offset,
width, height, val, rop, value_mask, write_mask, ilo->cp);
 
offset += pitch * height;
dst_size -= width * height;
}
 
ilo_blitter_blt_end(blitter, 0);
 
return true;
}
 
static bool
buf_copy_region(struct ilo_blitter *blitter,
struct ilo_buffer *dst, unsigned dst_offset,
struct ilo_buffer *src, unsigned src_offset,
unsigned size)
{
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_context *ilo = blitter->ilo;
unsigned offset = 0;
 
ilo_blitter_blt_begin(blitter, 0,
dst->bo, INTEL_TILING_NONE, src->bo, INTEL_TILING_NONE);
 
while (size) {
unsigned width, height;
int16_t pitch;
 
width = size;
height = 1;
pitch = 0;
 
if (width > gen6_max_bytes_per_scanline) {
/* less than INT16_MAX and dword-aligned */
pitch = 32764;
 
width = pitch;
height = size / width;
if (height > gen6_max_scanlines)
height = gen6_max_scanlines;
}
 
gen6_emit_SRC_COPY_BLT(ilo->dev,
dst->bo, pitch, dst_offset + offset,
width, height,
src->bo, pitch, src_offset + offset,
false, rop, GEN6_BLT_MASK_8, GEN6_BLT_MASK_8,
ilo->cp);
 
offset += pitch * height;
size -= width * height;
}
 
ilo_blitter_blt_end(blitter, 0);
 
return true;
}
 
static bool
tex_clear_region(struct ilo_blitter *blitter,
struct ilo_texture *dst, unsigned dst_level,
const struct pipe_box *dst_box,
uint32_t val,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
{
const int cpp = gen6_translate_blt_cpp(value_mask);
const unsigned max_extent = 32767; /* INT16_MAX */
const uint8_t rop = 0xf0; /* PATCOPY */
struct ilo_context *ilo = blitter->ilo;
uint32_t swctrl;
int slice;
 
/* no W-tiling support */
if (dst->separate_s8)
return false;
 
if (dst->bo_stride > max_extent)
return false;
 
swctrl = ilo_blitter_blt_begin(blitter, dst_box->depth * 6,
dst->bo, dst->tiling, NULL, INTEL_TILING_NONE);
 
for (slice = 0; slice < dst_box->depth; slice++) {
const struct ilo_texture_slice *dst_slice =
&dst->slice_offsets[dst_level][dst_box->z + slice];
unsigned x1, y1, x2, y2;
 
x1 = dst_slice->x + dst_box->x;
y1 = dst_slice->y + dst_box->y;
x2 = x1 + dst_box->width;
y2 = y1 + dst_box->height;
 
if (x2 > max_extent || y2 > max_extent ||
(x2 - x1) * cpp > gen6_max_bytes_per_scanline)
break;
 
gen6_emit_XY_COLOR_BLT(ilo->dev,
dst->bo, dst->tiling, dst->bo_stride, 0,
x1, y1, x2, y2, val, rop, value_mask, write_mask,
ilo->cp);
}
 
ilo_blitter_blt_end(blitter, swctrl);
 
return (slice == dst_box->depth);
}
 
static bool
tex_copy_region(struct ilo_blitter *blitter,
struct ilo_texture *dst,
unsigned dst_level,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
struct ilo_texture *src,
unsigned src_level,
const struct pipe_box *src_box)
{
const struct util_format_description *desc =
util_format_description(dst->bo_format);
const unsigned max_extent = 32767; /* INT16_MAX */
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_context *ilo = blitter->ilo;
enum gen6_blt_mask mask;
uint32_t swctrl;
int cpp, xscale, slice;
 
/* no W-tiling support */
if (dst->separate_s8 || src->separate_s8)
return false;
 
if (dst->bo_stride > max_extent || src->bo_stride > max_extent)
return false;
 
cpp = desc->block.bits / 8;
xscale = 1;
 
/* accommodate for larger cpp */
if (cpp > 4) {
if (cpp % 2 == 1)
return false;
 
cpp = (cpp % 4 == 0) ? 4 : 2;
xscale = (desc->block.bits / 8) / cpp;
}
 
switch (cpp) {
case 1:
mask = GEN6_BLT_MASK_8;
break;
case 2:
mask = GEN6_BLT_MASK_16;
break;
case 4:
mask = GEN6_BLT_MASK_32;
break;
default:
return false;
break;
}
 
swctrl = ilo_blitter_blt_begin(blitter, src_box->depth * 8,
dst->bo, dst->tiling, src->bo, src->tiling);
 
for (slice = 0; slice < src_box->depth; slice++) {
const struct ilo_texture_slice *dst_slice =
&dst->slice_offsets[dst_level][dst_z + slice];
const struct ilo_texture_slice *src_slice =
&src->slice_offsets[src_level][src_box->z + slice];
unsigned x1, y1, x2, y2, src_x, src_y;
 
x1 = (dst_slice->x + dst_x) * xscale;
y1 = dst_slice->y + dst_y;
x2 = (x1 + src_box->width) * xscale;
y2 = y1 + src_box->height;
src_x = (src_slice->x + src_box->x) * xscale;
src_y = src_slice->y + src_box->y;
 
/* in blocks */
x1 /= desc->block.width;
y1 /= desc->block.height;
x2 = (x2 + desc->block.width - 1) / desc->block.width;
y2 = (y2 + desc->block.height - 1) / desc->block.height;
src_x /= desc->block.width;
src_y /= desc->block.height;
 
if (x2 > max_extent || y2 > max_extent ||
src_x > max_extent || src_y > max_extent ||
(x2 - x1) * cpp > gen6_max_bytes_per_scanline)
break;
 
gen6_emit_XY_SRC_COPY_BLT(ilo->dev,
dst->bo, dst->tiling, dst->bo_stride, 0,
x1, y1, x2, y2,
src->bo, src->tiling, src->bo_stride, 0,
src_x, src_y, rop, mask, mask,
ilo->cp);
}
 
ilo_blitter_blt_end(blitter, swctrl);
 
return (slice == src_box->depth);
}
 
bool
ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter,
struct pipe_resource *dst, unsigned dst_level,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
struct pipe_resource *src, unsigned src_level,
const struct pipe_box *src_box)
{
bool success;
 
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
const unsigned dst_offset = dst_x;
const unsigned src_offset = src_box->x;
const unsigned size = src_box->width;
 
assert(dst_level == 0 && dst_y == 0 && dst_z == 0);
assert(src_level == 0 &&
src_box->y == 0 &&
src_box->z == 0 &&
src_box->height == 1 &&
src_box->depth == 1);
 
success = buf_copy_region(blitter,
ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size);
}
else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) {
success = tex_copy_region(blitter,
ilo_texture(dst), dst_level, dst_x, dst_y, dst_z,
ilo_texture(src), src_level, src_box);
}
else {
success = false;
}
 
return success;
}
 
bool
ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter,
struct pipe_surface *rt,
const union pipe_color_union *color,
unsigned x, unsigned y,
unsigned width, unsigned height)
{
const int cpp = util_format_get_blocksize(rt->format);
enum gen6_blt_mask mask;
union util_color packed;
bool success;
 
if (!ilo_3d_pass_render_condition(blitter->ilo))
return true;
 
switch (cpp) {
case 1:
mask = GEN6_BLT_MASK_8;
break;
case 2:
mask = GEN6_BLT_MASK_16;
break;
case 4:
mask = GEN6_BLT_MASK_32;
break;
default:
return false;
break;
}
 
if (util_format_is_pure_integer(rt->format) ||
util_format_is_compressed(rt->format))
return false;
 
util_pack_color(color->f, rt->format, &packed);
 
if (rt->texture->target == PIPE_BUFFER) {
unsigned offset, end, size;
 
assert(y == 0 && height == 1);
 
offset = (rt->u.buf.first_element + x) * cpp;
end = (rt->u.buf.last_element + 1) * cpp;
 
size = width * cpp;
if (offset + size > end)
size = end - offset;
 
success = buf_clear_region(blitter, ilo_buffer(rt->texture),
offset, size, packed.ui, mask, mask);
}
else {
struct pipe_box box;
 
u_box_3d(x, y, rt->u.tex.first_layer, width, height,
rt->u.tex.last_layer - rt->u.tex.first_layer + 1, &box);
 
success = tex_clear_region(blitter, ilo_texture(rt->texture),
rt->u.tex.level, &box, packed.ui, mask, mask);
}
 
return success;
}
 
bool
ilo_blitter_blt_clear_zs(struct ilo_blitter *blitter,
struct pipe_surface *zs,
unsigned clear_flags,
double depth, unsigned stencil,
unsigned x, unsigned y,
unsigned width, unsigned height)
{
enum gen6_blt_mask value_mask, write_mask;
struct pipe_box box;
uint32_t val;
 
if (!ilo_3d_pass_render_condition(blitter->ilo))
return true;
 
switch (zs->format) {
case PIPE_FORMAT_Z16_UNORM:
if (!(clear_flags & PIPE_CLEAR_DEPTH))
return true;
 
value_mask = GEN6_BLT_MASK_16;
write_mask = GEN6_BLT_MASK_16;
break;
case PIPE_FORMAT_Z32_FLOAT:
if (!(clear_flags & PIPE_CLEAR_DEPTH))
return true;
 
value_mask = GEN6_BLT_MASK_32;
write_mask = GEN6_BLT_MASK_32;
break;
case PIPE_FORMAT_Z24X8_UNORM:
if (!(clear_flags & PIPE_CLEAR_DEPTH))
return true;
 
value_mask = GEN6_BLT_MASK_32;
write_mask = GEN6_BLT_MASK_32_LO;
break;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
if (!(clear_flags & PIPE_CLEAR_DEPTHSTENCIL))
return true;
 
value_mask = GEN6_BLT_MASK_32;
 
if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL)
write_mask = GEN6_BLT_MASK_32;
else if (clear_flags & PIPE_CLEAR_DEPTH)
write_mask = GEN6_BLT_MASK_32_LO;
else
write_mask = GEN6_BLT_MASK_32_HI;
break;
default:
return false;
break;
}
 
val = util_pack_z_stencil(zs->format, depth, stencil);
 
u_box_3d(x, y, zs->u.tex.first_layer, width, height,
zs->u.tex.last_layer - zs->u.tex.first_layer + 1, &box);
 
assert(zs->texture->target != PIPE_BUFFER);
 
return tex_clear_region(blitter, ilo_texture(zs->texture),
zs->u.tex.level, &box, val, value_mask, write_mask);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_blitter_pipe.c
0,0 → 1,229
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_blitter.h"
#include "util/u_surface.h"
 
#include "ilo_3d.h"
#include "ilo_context.h"
#include "ilo_blitter.h"
 
enum ilo_blitter_pipe_op {
ILO_BLITTER_PIPE_BLIT,
ILO_BLITTER_PIPE_COPY,
ILO_BLITTER_PIPE_CLEAR,
ILO_BLITTER_PIPE_CLEAR_FB,
};
 
static void
ilo_blitter_pipe_begin(struct ilo_blitter *blitter,
enum ilo_blitter_pipe_op op,
bool scissor_enable)
{
struct blitter_context *b = blitter->pipe_blitter;
struct ilo_context *ilo = blitter->ilo;
 
/* vertex states */
util_blitter_save_vertex_buffer_slot(b, ilo->vb.states);
util_blitter_save_vertex_elements(b, (void *) ilo->ve);
util_blitter_save_vertex_shader(b, ilo->vs);
util_blitter_save_geometry_shader(b, ilo->gs);
util_blitter_save_so_targets(b, ilo->so.count, ilo->so.states);
util_blitter_save_rasterizer(b, (void *) ilo->rasterizer);
 
/* fragment states */
util_blitter_save_fragment_shader(b, ilo->fs);
util_blitter_save_depth_stencil_alpha(b, (void *) ilo->dsa);
util_blitter_save_blend(b, (void *) ilo->blend);
util_blitter_save_sample_mask(b, ilo->sample_mask);
util_blitter_save_stencil_ref(b, &ilo->stencil_ref);
util_blitter_save_viewport(b, &ilo->viewport.viewport0);
 
if (scissor_enable)
util_blitter_save_scissor(b, &ilo->scissor.scissor0);
 
switch (op) {
case ILO_BLITTER_PIPE_BLIT:
case ILO_BLITTER_PIPE_COPY:
/*
* we are about to call util_blitter_blit() or
* util_blitter_copy_texture()
*/
util_blitter_save_fragment_sampler_states(b,
ilo->sampler[PIPE_SHADER_FRAGMENT].count,
(void **) ilo->sampler[PIPE_SHADER_FRAGMENT].cso);
 
util_blitter_save_fragment_sampler_views(b,
ilo->view[PIPE_SHADER_FRAGMENT].count,
ilo->view[PIPE_SHADER_FRAGMENT].states);
 
util_blitter_save_framebuffer(b, &ilo->fb.state);
 
/* resource_copy_region() or blit() does not honor render condition */
util_blitter_save_render_condition(b,
ilo->hw3d->render_condition.query,
ilo->hw3d->render_condition.cond,
ilo->hw3d->render_condition.mode);
break;
case ILO_BLITTER_PIPE_CLEAR:
/*
* we are about to call util_blitter_clear_render_target() or
* util_blitter_clear_depth_stencil()
*/
util_blitter_save_framebuffer(b, &ilo->fb.state);
break;
case ILO_BLITTER_PIPE_CLEAR_FB:
/* we are about to call util_blitter_clear() */
break;
default:
break;
}
}
 
static void
ilo_blitter_pipe_end(struct ilo_blitter *blitter)
{
}
 
bool
ilo_blitter_pipe_blit(struct ilo_blitter *blitter,
const struct pipe_blit_info *info)
{
struct blitter_context *b = blitter->pipe_blitter;
struct pipe_blit_info skip_stencil;
 
if (util_try_blit_via_copy_region(&blitter->ilo->base, info))
return true;
 
if (!util_blitter_is_blit_supported(b, info)) {
/* try without stencil */
if (info->mask & PIPE_MASK_S) {
skip_stencil = *info;
skip_stencil.mask = info->mask & ~PIPE_MASK_S;
 
if (util_blitter_is_blit_supported(blitter->pipe_blitter,
&skip_stencil)) {
ilo_warn("ignore stencil buffer blitting\n");
info = &skip_stencil;
}
}
 
if (info != &skip_stencil) {
ilo_warn("failed to blit with pipe blitter\n");
return false;
}
}
 
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_BLIT,
info->scissor_enable);
util_blitter_blit(b, info);
ilo_blitter_pipe_end(blitter);
 
return true;
}
 
bool
ilo_blitter_pipe_copy_resource(struct ilo_blitter *blitter,
struct pipe_resource *dst, unsigned dst_level,
unsigned dst_x, unsigned dst_y, unsigned dst_z,
struct pipe_resource *src, unsigned src_level,
const struct pipe_box *src_box)
{
const unsigned mask = PIPE_MASK_RGBAZS;
const bool copy_all_samples = true;
 
/* not until we allow rendertargets to be buffers */
if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
return false;
 
if (!util_blitter_is_copy_supported(blitter->pipe_blitter, dst, src, mask))
return false;
 
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_COPY, false);
 
util_blitter_copy_texture(blitter->pipe_blitter,
dst, dst_level, dst_x, dst_y, dst_z,
src, src_level, src_box,
mask, copy_all_samples);
 
ilo_blitter_pipe_end(blitter);
 
return true;
}
 
bool
ilo_blitter_pipe_clear_rt(struct ilo_blitter *blitter,
struct pipe_surface *rt,
const union pipe_color_union *color,
unsigned x, unsigned y,
unsigned width, unsigned height)
{
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_CLEAR, false);
 
util_blitter_clear_render_target(blitter->pipe_blitter,
rt, color, x, y, width, height);
 
ilo_blitter_pipe_end(blitter);
 
return true;
}
 
bool
ilo_blitter_pipe_clear_zs(struct ilo_blitter *blitter,
struct pipe_surface *zs,
unsigned clear_flags,
double depth, unsigned stencil,
unsigned x, unsigned y,
unsigned width, unsigned height)
{
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_CLEAR, false);
 
util_blitter_clear_depth_stencil(blitter->pipe_blitter,
zs, clear_flags, depth, stencil, x, y, width, height);
 
ilo_blitter_pipe_end(blitter);
 
return true;
}
 
bool
ilo_blitter_pipe_clear_fb(struct ilo_blitter *blitter,
unsigned buffers,
const union pipe_color_union *color,
double depth, unsigned stencil)
{
/* TODO we should pause/resume some queries */
ilo_blitter_pipe_begin(blitter, ILO_BLITTER_PIPE_CLEAR_FB, false);
 
util_blitter_clear(blitter->pipe_blitter,
blitter->ilo->fb.state.width, blitter->ilo->fb.state.height,
buffers, color, depth, stencil);
 
ilo_blitter_pipe_end(blitter);
 
return true;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_common.h
0,0 → 1,113
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_COMMON_H
#define ILO_COMMON_H
 
#include "pipe/p_compiler.h"
#include "pipe/p_defines.h"
#include "pipe/p_format.h"
 
#include "util/u_debug.h"
#include "util/u_double_list.h"
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_pointer.h"
 
#define ILO_GEN(gen) ((int) (gen * 100))
#define ILO_GEN_GET_MAJOR(gen) (gen / 100)
 
enum ilo_debug {
ILO_DEBUG_3D = 1 << 0,
ILO_DEBUG_VS = 1 << 1,
ILO_DEBUG_GS = 1 << 2,
ILO_DEBUG_FS = 1 << 3,
ILO_DEBUG_CS = 1 << 4,
 
ILO_DEBUG_NOHW = 1 << 8,
ILO_DEBUG_NOCACHE = 1 << 9,
};
 
struct ilo_dev_info {
/* these mirror intel_winsys_info */
int devid;
bool has_llc;
bool has_gen7_sol_reset;
bool has_address_swizzling;
 
int gen;
int gt;
int urb_size;
};
 
extern int ilo_debug;
 
/**
* Print a message, for dumping or debugging.
*/
static inline void _util_printf_format(1, 2)
ilo_printf(const char *format, ...)
{
va_list ap;
 
va_start(ap, format);
_debug_vprintf(format, ap);
va_end(ap);
}
 
/**
* Print a critical error.
*/
static inline void _util_printf_format(1, 2)
ilo_err(const char *format, ...)
{
va_list ap;
 
va_start(ap, format);
_debug_vprintf(format, ap);
va_end(ap);
}
 
/**
* Print a warning, silenced for release builds.
*/
static inline void _util_printf_format(1, 2)
ilo_warn(const char *format, ...)
{
#ifdef DEBUG
va_list ap;
 
va_start(ap, format);
_debug_vprintf(format, ap);
va_end(ap);
#else
#endif
}
 
#endif /* ILO_COMMON_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_context.c
0,0 → 1,190
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_upload_mgr.h"
#include "intel_chipset.h"
 
#include "ilo_3d.h"
#include "ilo_blit.h"
#include "ilo_blitter.h"
#include "ilo_cp.h"
#include "ilo_gpgpu.h"
#include "ilo_query.h"
#include "ilo_resource.h"
#include "ilo_screen.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_transfer.h"
#include "ilo_video.h"
#include "ilo_context.h"
 
static void
ilo_context_cp_flushed(struct ilo_cp *cp, void *data)
{
struct ilo_context *ilo = ilo_context(data);
 
if (ilo->last_cp_bo)
intel_bo_unreference(ilo->last_cp_bo);
 
/* remember the just flushed bo, on which fences could wait */
ilo->last_cp_bo = cp->bo;
intel_bo_reference(ilo->last_cp_bo);
 
ilo_3d_cp_flushed(ilo->hw3d);
}
 
static void
ilo_flush(struct pipe_context *pipe,
struct pipe_fence_handle **f,
unsigned flags)
{
struct ilo_context *ilo = ilo_context(pipe);
 
if (f) {
struct ilo_fence *fence;
 
fence = CALLOC_STRUCT(ilo_fence);
if (fence) {
pipe_reference_init(&fence->reference, 1);
 
/* reference the batch bo that we want to wait on */
if (ilo_cp_empty(ilo->cp))
fence->bo = ilo->last_cp_bo;
else
fence->bo = ilo->cp->bo;
 
if (fence->bo)
intel_bo_reference(fence->bo);
}
 
*f = (struct pipe_fence_handle *) fence;
}
 
ilo_cp_flush(ilo->cp);
}
 
static void
ilo_context_destroy(struct pipe_context *pipe)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_cleanup_states(ilo);
 
if (ilo->last_cp_bo)
intel_bo_unreference(ilo->last_cp_bo);
 
if (ilo->uploader)
u_upload_destroy(ilo->uploader);
 
if (ilo->blitter)
ilo_blitter_destroy(ilo->blitter);
if (ilo->hw3d)
ilo_3d_destroy(ilo->hw3d);
if (ilo->shader_cache)
ilo_shader_cache_destroy(ilo->shader_cache);
if (ilo->cp)
ilo_cp_destroy(ilo->cp);
 
util_slab_destroy(&ilo->transfer_mempool);
 
FREE(ilo);
}
 
static struct pipe_context *
ilo_context_create(struct pipe_screen *screen, void *priv)
{
struct ilo_screen *is = ilo_screen(screen);
struct ilo_context *ilo;
 
ilo = CALLOC_STRUCT(ilo_context);
if (!ilo)
return NULL;
 
ilo->winsys = is->winsys;
ilo->dev = &is->dev;
 
/*
* initialize first, otherwise it may not be safe to call
* ilo_context_destroy() on errors
*/
util_slab_create(&ilo->transfer_mempool,
sizeof(struct ilo_transfer), 64, UTIL_SLAB_SINGLETHREADED);
 
ilo->cp = ilo_cp_create(ilo->winsys, is->dev.has_llc);
ilo->shader_cache = ilo_shader_cache_create();
if (ilo->cp)
ilo->hw3d = ilo_3d_create(ilo->cp, ilo->dev);
 
if (!ilo->cp || !ilo->shader_cache || !ilo->hw3d) {
ilo_context_destroy(&ilo->base);
return NULL;
}
 
ilo->uploader = u_upload_create(&ilo->base, 1024 * 1024, 16,
PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_INDEX_BUFFER);
if (!ilo->uploader) {
ilo_context_destroy(&ilo->base);
return NULL;
}
 
ilo_cp_set_flush_callback(ilo->cp,
ilo_context_cp_flushed, (void *) ilo);
 
ilo->base.screen = screen;
ilo->base.priv = priv;
 
ilo->base.destroy = ilo_context_destroy;
ilo->base.flush = ilo_flush;
 
ilo_init_3d_functions(ilo);
ilo_init_query_functions(ilo);
ilo_init_state_functions(ilo);
ilo_init_blit_functions(ilo);
ilo_init_transfer_functions(ilo);
ilo_init_video_functions(ilo);
ilo_init_gpgpu_functions(ilo);
 
ilo_init_states(ilo);
 
/* this must be called last as u_blitter is a client of the pipe context */
ilo->blitter = ilo_blitter_create(ilo);
if (!ilo->blitter) {
ilo_context_destroy(&ilo->base);
return NULL;
}
 
return &ilo->base;
}
 
/**
* Initialize context-related functions.
*/
void
ilo_init_context_functions(struct ilo_screen *is)
{
is->base.context_create = ilo_context_create;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_context.h
0,0 → 1,113
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_CONTEXT_H
#define ILO_CONTEXT_H
 
#include "pipe/p_context.h"
#include "util/u_slab.h"
 
#include "ilo_gpe.h"
#include "ilo_common.h"
 
struct pipe_draw_info;
struct u_upload_mgr;
struct intel_winsys;
struct intel_bo;
struct ilo_3d;
struct ilo_blitter;
struct ilo_cp;
struct ilo_screen;
struct ilo_shader_state;
 
struct ilo_context {
struct pipe_context base;
 
struct intel_winsys *winsys;
struct ilo_dev_info *dev;
 
struct util_slab_mempool transfer_mempool;
 
struct ilo_cp *cp;
struct intel_bo *last_cp_bo;
 
struct ilo_shader_cache *shader_cache;
struct ilo_3d *hw3d;
struct ilo_blitter *blitter;
 
struct u_upload_mgr *uploader;
 
const struct pipe_draw_info *draw;
uint32_t dirty;
 
struct ilo_vb_state vb;
const struct ilo_ve_state *ve;
struct ilo_ib_state ib;
 
struct ilo_shader_state *vs;
struct ilo_shader_state *gs;
 
struct ilo_so_state so;
 
struct pipe_clip_state clip;
struct ilo_viewport_state viewport;
struct ilo_scissor_state scissor;
 
const struct ilo_rasterizer_state *rasterizer;
struct pipe_poly_stipple poly_stipple;
unsigned sample_mask;
 
struct ilo_shader_state *fs;
 
const struct ilo_dsa_state *dsa;
struct pipe_stencil_ref stencil_ref;
const struct ilo_blend_state *blend;
struct pipe_blend_color blend_color;
struct ilo_fb_state fb;
 
/* shader resources */
struct ilo_sampler_state sampler[PIPE_SHADER_TYPES];
struct ilo_view_state view[PIPE_SHADER_TYPES];
struct ilo_cbuf_state cbuf[PIPE_SHADER_TYPES];
struct ilo_resource_state resource;
 
/* GPGPU */
struct ilo_shader_state *cs;
struct ilo_resource_state cs_resource;
struct ilo_global_binding global_binding;
};
 
static inline struct ilo_context *
ilo_context(struct pipe_context *pipe)
{
return (struct ilo_context *) pipe;
}
 
void
ilo_init_context_functions(struct ilo_screen *is);
 
#endif /* ILO_CONTEXT_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_cp.c
0,0 → 1,313
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "intel_reg.h" /* for MI_xxx */
#include "intel_winsys.h"
 
#include "ilo_cp.h"
 
/* the size of the private space */
static const int ilo_cp_private = 2;
 
/**
* Dump the contents of the parser bo. This can only be called in the flush
* callback.
*/
void
ilo_cp_dump(struct ilo_cp *cp)
{
ilo_printf("dumping %d bytes\n", cp->used * 4);
if (cp->used)
intel_winsys_decode_commands(cp->winsys, cp->bo, cp->used * 4);
}
 
/**
* Save the command parser state for rewind.
*
* Note that this cannot rewind a flush, and the caller must make sure
* that does not happend.
*/
void
ilo_cp_setjmp(struct ilo_cp *cp, struct ilo_cp_jmp_buf *jmp)
{
jmp->id = pointer_to_intptr(cp->bo);
 
jmp->size = cp->size;
jmp->used = cp->used;
jmp->stolen = cp->stolen;
/* save reloc count to rewind ilo_cp_write_bo() */
jmp->reloc_count = intel_bo_get_reloc_count(cp->bo);
}
 
/**
* Rewind to the saved state.
*/
void
ilo_cp_longjmp(struct ilo_cp *cp, const struct ilo_cp_jmp_buf *jmp)
{
if (jmp->id != pointer_to_intptr(cp->bo)) {
assert(!"invalid use of CP longjmp");
return;
}
 
cp->size = jmp->size;
cp->used = jmp->used;
cp->stolen = jmp->stolen;
intel_bo_clear_relocs(cp->bo, jmp->reloc_count);
}
 
/**
* Clear the parser buffer.
*/
static void
ilo_cp_clear_buffer(struct ilo_cp *cp)
{
cp->cmd_cur = 0;
cp->cmd_end = 0;
 
cp->used = 0;
cp->stolen = 0;
 
/*
* Recalculate cp->size. This is needed not only because cp->stolen is
* reset above, but also that ilo_cp_private are added to cp->size in
* ilo_cp_end_buffer().
*/
cp->size = cp->bo_size - ilo_cp_private;
}
 
/**
* Add MI_BATCH_BUFFER_END to the private space of the parser buffer.
*/
static void
ilo_cp_end_buffer(struct ilo_cp *cp)
{
/* make the private space available */
cp->size += ilo_cp_private;
 
assert(cp->used + 2 <= cp->size);
 
cp->ptr[cp->used++] = MI_BATCH_BUFFER_END;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 107:
*
* "The batch buffer must be QWord aligned and a multiple of QWords in
* length."
*/
if (cp->used & 1)
cp->ptr[cp->used++] = MI_NOOP;
}
 
/**
* Upload the parser buffer to the bo.
*/
static int
ilo_cp_upload_buffer(struct ilo_cp *cp)
{
int err;
 
if (!cp->sys) {
intel_bo_unmap(cp->bo);
return 0;
}
 
err = intel_bo_pwrite(cp->bo, 0, cp->used * 4, cp->ptr);
if (likely(!err && cp->stolen)) {
const int offset = cp->bo_size - cp->stolen;
 
err = intel_bo_pwrite(cp->bo, offset * 4,
cp->stolen * 4, &cp->ptr[offset]);
}
 
return err;
}
 
/**
* Reallocate the parser bo.
*/
static void
ilo_cp_realloc_bo(struct ilo_cp *cp)
{
struct intel_bo *bo;
 
/*
* allocate the new bo before unreferencing the old one so that they
* won't point at the same address, which is needed for jmpbuf
*/
bo = intel_winsys_alloc_buffer(cp->winsys,
"batch buffer", cp->bo_size * 4, 0);
if (unlikely(!bo)) {
/* reuse the old one */
bo = cp->bo;
intel_bo_reference(bo);
}
 
if (cp->bo)
intel_bo_unreference(cp->bo);
cp->bo = bo;
 
if (!cp->sys) {
intel_bo_map(cp->bo, true);
cp->ptr = intel_bo_get_virtual(cp->bo);
}
}
 
/**
* Execute the parser bo.
*/
static int
ilo_cp_exec_bo(struct ilo_cp *cp)
{
const bool do_exec = !(ilo_debug & ILO_DEBUG_NOHW);
struct intel_context *ctx;
unsigned long flags;
int err;
 
switch (cp->ring) {
case ILO_CP_RING_RENDER:
ctx = cp->render_ctx;
flags = INTEL_EXEC_RENDER;
break;
case ILO_CP_RING_BLT:
ctx = NULL;
flags = INTEL_EXEC_BLT;
break;
default:
ctx = NULL;
flags = 0;
break;
}
 
flags |= cp->one_off_flags;
 
if (likely(do_exec))
err = intel_bo_exec(cp->bo, cp->used * 4, ctx, flags);
else
err = 0;
 
cp->one_off_flags = 0;
 
return err;
}
 
/**
* Flush the command parser and execute the commands. When the parser buffer
* is empty, the callback is not invoked.
*/
void
ilo_cp_flush(struct ilo_cp *cp)
{
int err;
 
ilo_cp_set_owner(cp, NULL, 0);
 
/* sanity check */
assert(cp->bo_size == cp->size + cp->stolen + ilo_cp_private);
 
if (!cp->used) {
/* return the space stolen and etc. */
ilo_cp_clear_buffer(cp);
 
return;
}
 
ilo_cp_end_buffer(cp);
 
/* upload and execute */
err = ilo_cp_upload_buffer(cp);
if (likely(!err))
err = ilo_cp_exec_bo(cp);
 
if (likely(!err && cp->flush_callback))
cp->flush_callback(cp, cp->flush_callback_data);
 
ilo_cp_clear_buffer(cp);
ilo_cp_realloc_bo(cp);
}
 
/**
* Destroy the command parser.
*/
void
ilo_cp_destroy(struct ilo_cp *cp)
{
if (cp->bo) {
if (!cp->sys)
intel_bo_unmap(cp->bo);
 
intel_bo_unreference(cp->bo);
}
 
if (cp->render_ctx)
intel_winsys_destroy_context(cp->winsys, cp->render_ctx);
 
FREE(cp->sys);
FREE(cp);
}
 
/**
* Create a command parser.
*/
struct ilo_cp *
ilo_cp_create(struct intel_winsys *winsys, bool direct_map)
{
struct ilo_cp *cp;
 
cp = CALLOC_STRUCT(ilo_cp);
if (!cp)
return NULL;
 
cp->winsys = winsys;
cp->render_ctx = intel_winsys_create_context(winsys);
 
cp->ring = ILO_CP_RING_RENDER;
cp->no_implicit_flush = false;
 
cp->bo_size = 8192;
 
if (!direct_map) {
cp->sys = MALLOC(cp->bo_size * 4);
if (!cp->sys) {
FREE(cp);
return NULL;
}
 
cp->ptr = cp->sys;
}
 
ilo_cp_realloc_bo(cp);
if (!cp->bo) {
FREE(cp->sys);
FREE(cp);
return NULL;
}
 
ilo_cp_clear_buffer(cp);
 
return cp;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_cp.h
0,0 → 1,363
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_CP_H
#define ILO_CP_H
 
#include "intel_winsys.h"
 
#include "ilo_common.h"
 
struct ilo_cp;
 
enum ilo_cp_ring {
ILO_CP_RING_RENDER,
ILO_CP_RING_BLT,
 
ILO_CP_RING_COUNT,
};
 
typedef void (*ilo_cp_callback)(struct ilo_cp *cp, void *data);
 
struct ilo_cp_owner {
ilo_cp_callback release_callback;
void *release_data;
};
 
/**
* Command parser.
*/
struct ilo_cp {
struct intel_winsys *winsys;
struct intel_context *render_ctx;
 
ilo_cp_callback flush_callback;
void *flush_callback_data;
 
const struct ilo_cp_owner *owner;
int owner_reserve;
 
enum ilo_cp_ring ring;
bool no_implicit_flush;
unsigned one_off_flags;
 
int bo_size;
struct intel_bo *bo;
uint32_t *sys;
 
uint32_t *ptr;
int size, used, stolen;
 
int cmd_cur, cmd_end;
};
 
/**
* Jump buffer to save command parser state for rewind.
*/
struct ilo_cp_jmp_buf {
intptr_t id;
int size, used, stolen;
int reloc_count;
};
 
struct ilo_cp *
ilo_cp_create(struct intel_winsys *winsys, bool direct_map);
 
void
ilo_cp_destroy(struct ilo_cp *cp);
 
void
ilo_cp_flush(struct ilo_cp *cp);
 
void
ilo_cp_dump(struct ilo_cp *cp);
 
void
ilo_cp_setjmp(struct ilo_cp *cp, struct ilo_cp_jmp_buf *jmp);
 
void
ilo_cp_longjmp(struct ilo_cp *cp, const struct ilo_cp_jmp_buf *jmp);
 
/**
* Return true if the parser buffer is empty.
*/
static inline bool
ilo_cp_empty(struct ilo_cp *cp)
{
return !cp->used;
}
 
/**
* Return the remaining space (in dwords) in the parser buffer.
*/
static inline int
ilo_cp_space(struct ilo_cp *cp)
{
return cp->size - cp->used;
}
 
/**
* Internal function called by functions that flush implicitly.
*/
static inline void
ilo_cp_implicit_flush(struct ilo_cp *cp)
{
if (cp->no_implicit_flush) {
assert(!"unexpected command parser flush");
/* discard the commands */
cp->used = 0;
}
 
ilo_cp_flush(cp);
}
 
/**
* Set the ring buffer.
*/
static inline void
ilo_cp_set_ring(struct ilo_cp *cp, enum ilo_cp_ring ring)
{
if (cp->ring != ring) {
ilo_cp_implicit_flush(cp);
cp->ring = ring;
}
}
 
/**
* Assert that no function should flush implicitly.
*/
static inline void
ilo_cp_assert_no_implicit_flush(struct ilo_cp *cp, bool enable)
{
cp->no_implicit_flush = enable;
}
 
/**
* Set one-off flags. They will be cleared after flushing.
*/
static inline void
ilo_cp_set_one_off_flags(struct ilo_cp *cp, unsigned flags)
{
cp->one_off_flags |= flags;
}
 
/**
* Set flush callback. The callback is invoked after the bo has been
* successfully executed, and before the bo is reallocated.
*/
static inline void
ilo_cp_set_flush_callback(struct ilo_cp *cp, ilo_cp_callback callback,
void *data)
{
cp->flush_callback = callback;
cp->flush_callback_data = data;
}
 
/**
* Set the parser owner. If this is a new owner, the previous owner is
* notified and the space it reserved is reclaimed.
*
* \return true if this is a new owner
*/
static inline bool
ilo_cp_set_owner(struct ilo_cp *cp, const struct ilo_cp_owner *owner,
int reserve)
{
const bool new_owner = (cp->owner != owner);
 
/* release current owner */
if (new_owner && cp->owner) {
const bool no_implicit_flush = cp->no_implicit_flush;
 
/* reclaim the reserved space */
cp->size += cp->owner_reserve;
cp->owner_reserve = 0;
 
/* invoke the release callback */
cp->no_implicit_flush = true;
cp->owner->release_callback(cp, cp->owner->release_data);
cp->no_implicit_flush = no_implicit_flush;
 
cp->owner = NULL;
}
 
if (cp->owner_reserve != reserve) {
const int extra = reserve - cp->owner_reserve;
 
if (cp->used > cp->size - extra) {
ilo_cp_implicit_flush(cp);
assert(cp->used <= cp->size - reserve);
 
cp->size -= reserve;
cp->owner_reserve = reserve;
}
else {
cp->size -= extra;
cp->owner_reserve += extra;
}
}
 
/* set owner last because of the possible flush above */
cp->owner = owner;
 
return new_owner;
}
 
/**
* Begin writing a command.
*/
static inline void
ilo_cp_begin(struct ilo_cp *cp, int cmd_size)
{
if (cp->used + cmd_size > cp->size) {
ilo_cp_implicit_flush(cp);
assert(cp->used + cmd_size <= cp->size);
}
 
assert(cp->cmd_cur == cp->cmd_end);
cp->cmd_cur = cp->used;
cp->cmd_end = cp->cmd_cur + cmd_size;
cp->used = cp->cmd_end;
}
 
/**
* Begin writing data to a space stolen from the top of the parser buffer.
*
* \param desc informative description of the data to be written
* \param data_size in dwords
* \param align in dwords
* \param bo_offset in bytes to the stolen space
*/
static inline void
ilo_cp_steal(struct ilo_cp *cp, const char *desc,
int data_size, int align, uint32_t *bo_offset)
{
int pad, steal;
 
if (!align)
align = 1;
 
pad = (cp->bo_size - cp->stolen - data_size) % align;
steal = data_size + pad;
 
/* flush if there is not enough space after stealing */
if (cp->used > cp->size - steal) {
ilo_cp_implicit_flush(cp);
 
pad = (cp->bo_size - cp->stolen - data_size) % align;
steal = data_size + steal;
 
assert(cp->used <= cp->size - steal);
}
 
cp->size -= steal;
cp->stolen += steal;
 
assert(cp->cmd_cur == cp->cmd_end);
cp->cmd_cur = cp->bo_size - cp->stolen;
cp->cmd_end = cp->cmd_cur + data_size;
 
/* offset in cp->bo */
if (bo_offset)
*bo_offset = cp->cmd_cur * 4;
}
 
/**
* Write a dword to the parser buffer. This function must be enclosed by
* ilo_cp_begin()/ilo_cp_steal() and ilo_cp_end().
*/
static inline void
ilo_cp_write(struct ilo_cp *cp, uint32_t val)
{
assert(cp->cmd_cur < cp->cmd_end);
cp->ptr[cp->cmd_cur++] = val;
}
 
/**
* Write multiple dwords to the parser buffer.
*/
static inline void
ilo_cp_write_multi(struct ilo_cp *cp, const void *vals, int num_vals)
{
assert(cp->cmd_cur + num_vals <= cp->cmd_end);
memcpy(cp->ptr + cp->cmd_cur, vals, num_vals * 4);
cp->cmd_cur += num_vals;
}
 
/**
* Write a bo to the parser buffer. In addition to writing the offset of the
* bo to the buffer, it also emits a relocation.
*/
static inline void
ilo_cp_write_bo(struct ilo_cp *cp, uint32_t val, struct intel_bo *bo,
uint32_t read_domains, uint32_t write_domain)
{
if (bo) {
intel_bo_emit_reloc(cp->bo, cp->cmd_cur * 4,
bo, val, read_domains, write_domain);
 
ilo_cp_write(cp, val + intel_bo_get_offset(bo));
}
else {
ilo_cp_write(cp, val);
}
}
 
/**
* End a command. Every ilo_cp_begin() or ilo_cp_steal() must have a
* matching ilo_cp_end().
*/
static inline void
ilo_cp_end(struct ilo_cp *cp)
{
assert(cp->cmd_cur == cp->cmd_end);
}
 
/**
* A variant of ilo_cp_steal() where the data are written via the returned
* pointer.
*
* \return ptr pointer where the data are written to. It is valid until any
* change is made to the parser.
*/
static inline void *
ilo_cp_steal_ptr(struct ilo_cp *cp, const char *desc,
int data_size, int align, uint32_t *bo_offset)
{
void *ptr;
 
ilo_cp_steal(cp, desc, data_size, align, bo_offset);
 
ptr = &cp->ptr[cp->cmd_cur];
cp->cmd_cur = cp->cmd_end;
 
ilo_cp_end(cp);
 
return ptr;
}
 
#endif /* ILO_CP_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_format.c
0,0 → 1,687
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "vl/vl_video_buffer.h"
#include "brw_defines.h"
 
#include "ilo_screen.h"
#include "ilo_format.h"
 
/* stolen from classic i965 */
struct surface_format_info {
bool exists;
int sampling;
int filtering;
int shadow_compare;
int chroma_key;
int render_target;
int alpha_blend;
int input_vb;
int streamed_output_vb;
int color_processing;
};
 
/* This macro allows us to write the table almost as it appears in the PRM,
* while restructuring it to turn it into the C code we want.
*/
#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \
[sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color },
 
#define Y 0
#define x 999
/**
* This is the table of support for surface (texture, renderbuffer, and vertex
* buffer, but not depthbuffer) formats across the various hardware generations.
*
* The table is formatted to match the documentation, except that the docs have
* this ridiculous mapping of Y[*+~^#&] for "supported on DevWhatever". To put
* it in our table, here's the mapping:
*
* Y*: 45
* Y+: 45 (g45/gm45)
* Y~: 50 (gen5)
* Y^: 60 (gen6)
* Y#: 70 (gen7)
*
* The abbreviations in the header below are:
* smpl - Sampling Engine
* filt - Sampling Engine Filtering
* shad - Sampling Engine Shadow Map
* CK - Sampling Engine Chroma Key
* RT - Render Target
* AB - Alpha Blend Render Target
* VB - Input Vertex Buffer
* SO - Steamed Output Vertex Buffers (transform feedback)
* color - Color Processing
*
* See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
*
* As of Ivybridge, the columns are no longer in that table and the
* information can be found spread across:
*
* - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch).
* - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping.
* - VOL4_Part1 section 3.9.11 Render Target Write.
*/
const struct surface_format_info surface_formats[] = {
/* smpl filt shad CK RT AB VB SO color */
SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_FLOAT)
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_SINT)
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32A32_UINT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_UNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64_FLOAT)
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64_PASSTHRU)
SF( Y, 50, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_FLOAT)
SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_SINT)
SF( Y, x, x, x, x, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32B32_UINT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_UNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32B32_USCALED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32B32_SFIXED)
SF( Y, Y, x, x, Y, 45, Y, x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM)
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SINT)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_UINT)
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT)
SF( Y, 50, x, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32G32_FLOAT)
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_SINT)
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32G32_UINT)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS)
SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT)
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32A32_FLOAT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_UNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64_FLOAT)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16X16_FLOAT)
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32X32_FLOAT)
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32X32_FLOAT)
SF( Y, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32X32_FLOAT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32G32_USCALED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32G32_SFIXED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64_PASSTHRU)
SF( Y, Y, x, Y, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM)
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB)
/* smpl filt shad CK RT AB VB SO color */
SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM)
SF( Y, Y, x, x, x, x, x, x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10A2_UINT)
SF( Y, Y, x, x, x, Y, Y, x, x, BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM)
SF( Y, Y, x, x, Y, Y, Y, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM)
SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB)
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SNORM)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SINT)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_UINT)
SF( Y, Y, x, x, Y, 45, Y, x, x, BRW_SURFACEFORMAT_R16G16_UNORM)
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16G16_SNORM)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SINT)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_UINT)
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16G16_FLOAT)
SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM)
SF( Y, Y, x, x, Y, Y, x, x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB)
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R11G11B10_FLOAT)
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_SINT)
SF( Y, x, x, x, Y, x, Y, Y, x, BRW_SURFACEFORMAT_R32_UINT)
SF( Y, 50, Y, x, Y, Y, Y, Y, x, BRW_SURFACEFORMAT_R32_FLOAT)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS)
SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_UNORM)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I24X8_UNORM)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L24X8_UNORM)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A24X8_UNORM)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I32_FLOAT)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L32_FLOAT)
SF( Y, 50, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A32_FLOAT)
SF( Y, Y, x, Y, x, x, x, x, 60, BRW_SURFACEFORMAT_B8G8R8X8_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10X2_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16A16_FLOAT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_UNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SNORM)
/* smpl filt shad CK RT AB VB SO color */
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R10G10B10X2_USCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8A8_USCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16_USCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R32_USCALED)
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM)
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB)
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM)
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB)
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM)
SF( Y, Y, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB)
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8G8_UNORM)
SF( Y, Y, x, Y, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8G8_SNORM)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SINT)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_UINT)
SF( Y, Y, Y, x, Y, 45, Y, x, 70, BRW_SURFACEFORMAT_R16_UNORM)
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R16_SNORM)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_SINT)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R16_UINT)
SF( Y, Y, x, x, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R16_FLOAT)
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0)
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1)
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_UNORM)
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_UNORM)
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_UNORM)
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM)
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_I16_FLOAT)
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_L16_FLOAT)
SF( Y, Y, Y, x, x, x, x, x, x, BRW_SURFACEFORMAT_A16_FLOAT)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UNORM_SRGB)
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM)
SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM)
SF( x, x, x, x, Y, Y, x, x, x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8_USCALED)
/* smpl filt shad CK RT AB VB SO color */
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16_USCALED)
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0)
SF(50, 50, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_UINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8A8_SINT)
SF( Y, Y, x, 45, Y, Y, Y, x, x, BRW_SURFACEFORMAT_R8_UNORM)
SF( Y, Y, x, x, Y, 60, Y, x, x, BRW_SURFACEFORMAT_R8_SNORM)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_SINT)
SF( Y, x, x, x, Y, x, Y, x, x, BRW_SURFACEFORMAT_R8_UINT)
SF( Y, Y, x, Y, Y, Y, x, x, x, BRW_SURFACEFORMAT_A8_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UNORM)
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8_USCALED)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UNORM_SRGB)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_Y8_SNORM)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_UINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_L8_SINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_UINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_I8_SINT)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R1_UINT)
SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL)
SF( Y, Y, x, Y, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0)
SF(45, 45, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1)
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM)
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM)
SF( Y, Y, x, Y, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_UNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC1_UNORM_SRGB)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC2_UNORM_SRGB)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC3_UNORM_SRGB)
SF( Y, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_MONO8)
SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUV)
SF( Y, Y, x, x, Y, x, x, x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPY)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_DXT1_RGB)
/* smpl filt shad CK RT AB VB SO color */
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_FXT1)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R8G8B8_USCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64A64_FLOAT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R64G64B64_FLOAT)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC4_SNORM)
SF( Y, Y, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC5_SNORM)
SF(50, 50, x, x, x, x, 60, x, x, BRW_SURFACEFORMAT_R16G16B16_FLOAT)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_UNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SNORM)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_SSCALED)
SF( x, x, x, x, x, x, Y, x, x, BRW_SURFACEFORMAT_R16G16B16_USCALED)
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_SF16)
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM)
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB)
SF(70, 70, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_BC6H_UF16)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_PLANAR_420_8)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC1_RGB8)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_R11)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_RG11)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_R11)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_UINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R16G16B16_SINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R32_SFIXED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R10G10B10A2_SINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_UINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_B10G10R10A2_SINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_UINT)
SF( x, x, x, x, x, x, x, x, x, BRW_SURFACEFORMAT_R8G8B8_SINT)
};
#undef x
#undef Y
 
static const struct surface_format_info *
lookup_surface_format_info(enum pipe_format format, unsigned bind)
{
static const struct surface_format_info nonexist = {
.exists = false,
.sampling = 999,
.filtering = 999,
.shadow_compare = 999,
.chroma_key = 999,
.render_target = 999,
.alpha_blend = 999,
.input_vb = 999,
.streamed_output_vb = 999,
.color_processing = 999,
};
const int surfaceformat = ilo_translate_format(format, bind);
 
return (surfaceformat >= 0 && surfaceformat < Elements(surface_formats) &&
surface_formats[surfaceformat].exists) ?
&surface_formats[surfaceformat] : &nonexist;
}
 
/**
* Translate a color (non-depth/stencil) pipe format to the matching hardware
* format. Return -1 on errors.
*/
int
ilo_translate_color_format(enum pipe_format format)
{
static const int format_mapping[PIPE_FORMAT_COUNT] = {
[PIPE_FORMAT_NONE] = 0,
[PIPE_FORMAT_B8G8R8A8_UNORM] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
[PIPE_FORMAT_B8G8R8X8_UNORM] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
[PIPE_FORMAT_A8R8G8B8_UNORM] = 0,
[PIPE_FORMAT_X8R8G8B8_UNORM] = 0,
[PIPE_FORMAT_B5G5R5A1_UNORM] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM,
[PIPE_FORMAT_B4G4R4A4_UNORM] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
[PIPE_FORMAT_B5G6R5_UNORM] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
[PIPE_FORMAT_R10G10B10A2_UNORM] = BRW_SURFACEFORMAT_R10G10B10A2_UNORM,
[PIPE_FORMAT_L8_UNORM] = BRW_SURFACEFORMAT_L8_UNORM,
[PIPE_FORMAT_A8_UNORM] = BRW_SURFACEFORMAT_A8_UNORM,
[PIPE_FORMAT_I8_UNORM] = BRW_SURFACEFORMAT_I8_UNORM,
[PIPE_FORMAT_L8A8_UNORM] = BRW_SURFACEFORMAT_L8A8_UNORM,
[PIPE_FORMAT_L16_UNORM] = BRW_SURFACEFORMAT_L16_UNORM,
[PIPE_FORMAT_UYVY] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
[PIPE_FORMAT_YUYV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
[PIPE_FORMAT_Z16_UNORM] = 0,
[PIPE_FORMAT_Z32_UNORM] = 0,
[PIPE_FORMAT_Z32_FLOAT] = 0,
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = 0,
[PIPE_FORMAT_S8_UINT_Z24_UNORM] = 0,
[PIPE_FORMAT_Z24X8_UNORM] = 0,
[PIPE_FORMAT_X8Z24_UNORM] = 0,
[PIPE_FORMAT_S8_UINT] = 0,
[PIPE_FORMAT_R64_FLOAT] = BRW_SURFACEFORMAT_R64_FLOAT,
[PIPE_FORMAT_R64G64_FLOAT] = BRW_SURFACEFORMAT_R64G64_FLOAT,
[PIPE_FORMAT_R64G64B64_FLOAT] = BRW_SURFACEFORMAT_R64G64B64_FLOAT,
[PIPE_FORMAT_R64G64B64A64_FLOAT] = BRW_SURFACEFORMAT_R64G64B64A64_FLOAT,
[PIPE_FORMAT_R32_FLOAT] = BRW_SURFACEFORMAT_R32_FLOAT,
[PIPE_FORMAT_R32G32_FLOAT] = BRW_SURFACEFORMAT_R32G32_FLOAT,
[PIPE_FORMAT_R32G32B32_FLOAT] = BRW_SURFACEFORMAT_R32G32B32_FLOAT,
[PIPE_FORMAT_R32G32B32A32_FLOAT] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
[PIPE_FORMAT_R32_UNORM] = BRW_SURFACEFORMAT_R32_UNORM,
[PIPE_FORMAT_R32G32_UNORM] = BRW_SURFACEFORMAT_R32G32_UNORM,
[PIPE_FORMAT_R32G32B32_UNORM] = BRW_SURFACEFORMAT_R32G32B32_UNORM,
[PIPE_FORMAT_R32G32B32A32_UNORM] = BRW_SURFACEFORMAT_R32G32B32A32_UNORM,
[PIPE_FORMAT_R32_USCALED] = BRW_SURFACEFORMAT_R32_USCALED,
[PIPE_FORMAT_R32G32_USCALED] = BRW_SURFACEFORMAT_R32G32_USCALED,
[PIPE_FORMAT_R32G32B32_USCALED] = BRW_SURFACEFORMAT_R32G32B32_USCALED,
[PIPE_FORMAT_R32G32B32A32_USCALED] = BRW_SURFACEFORMAT_R32G32B32A32_USCALED,
[PIPE_FORMAT_R32_SNORM] = BRW_SURFACEFORMAT_R32_SNORM,
[PIPE_FORMAT_R32G32_SNORM] = BRW_SURFACEFORMAT_R32G32_SNORM,
[PIPE_FORMAT_R32G32B32_SNORM] = BRW_SURFACEFORMAT_R32G32B32_SNORM,
[PIPE_FORMAT_R32G32B32A32_SNORM] = BRW_SURFACEFORMAT_R32G32B32A32_SNORM,
[PIPE_FORMAT_R32_SSCALED] = BRW_SURFACEFORMAT_R32_SSCALED,
[PIPE_FORMAT_R32G32_SSCALED] = BRW_SURFACEFORMAT_R32G32_SSCALED,
[PIPE_FORMAT_R32G32B32_SSCALED] = BRW_SURFACEFORMAT_R32G32B32_SSCALED,
[PIPE_FORMAT_R32G32B32A32_SSCALED] = BRW_SURFACEFORMAT_R32G32B32A32_SSCALED,
[PIPE_FORMAT_R16_UNORM] = BRW_SURFACEFORMAT_R16_UNORM,
[PIPE_FORMAT_R16G16_UNORM] = BRW_SURFACEFORMAT_R16G16_UNORM,
[PIPE_FORMAT_R16G16B16_UNORM] = BRW_SURFACEFORMAT_R16G16B16_UNORM,
[PIPE_FORMAT_R16G16B16A16_UNORM] = BRW_SURFACEFORMAT_R16G16B16A16_UNORM,
[PIPE_FORMAT_R16_USCALED] = BRW_SURFACEFORMAT_R16_USCALED,
[PIPE_FORMAT_R16G16_USCALED] = BRW_SURFACEFORMAT_R16G16_USCALED,
[PIPE_FORMAT_R16G16B16_USCALED] = BRW_SURFACEFORMAT_R16G16B16_USCALED,
[PIPE_FORMAT_R16G16B16A16_USCALED] = BRW_SURFACEFORMAT_R16G16B16A16_USCALED,
[PIPE_FORMAT_R16_SNORM] = BRW_SURFACEFORMAT_R16_SNORM,
[PIPE_FORMAT_R16G16_SNORM] = BRW_SURFACEFORMAT_R16G16_SNORM,
[PIPE_FORMAT_R16G16B16_SNORM] = BRW_SURFACEFORMAT_R16G16B16_SNORM,
[PIPE_FORMAT_R16G16B16A16_SNORM] = BRW_SURFACEFORMAT_R16G16B16A16_SNORM,
[PIPE_FORMAT_R16_SSCALED] = BRW_SURFACEFORMAT_R16_SSCALED,
[PIPE_FORMAT_R16G16_SSCALED] = BRW_SURFACEFORMAT_R16G16_SSCALED,
[PIPE_FORMAT_R16G16B16_SSCALED] = BRW_SURFACEFORMAT_R16G16B16_SSCALED,
[PIPE_FORMAT_R16G16B16A16_SSCALED] = BRW_SURFACEFORMAT_R16G16B16A16_SSCALED,
[PIPE_FORMAT_R8_UNORM] = BRW_SURFACEFORMAT_R8_UNORM,
[PIPE_FORMAT_R8G8_UNORM] = BRW_SURFACEFORMAT_R8G8_UNORM,
[PIPE_FORMAT_R8G8B8_UNORM] = BRW_SURFACEFORMAT_R8G8B8_UNORM,
[PIPE_FORMAT_R8G8B8A8_UNORM] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM,
[PIPE_FORMAT_X8B8G8R8_UNORM] = 0,
[PIPE_FORMAT_R8_USCALED] = BRW_SURFACEFORMAT_R8_USCALED,
[PIPE_FORMAT_R8G8_USCALED] = BRW_SURFACEFORMAT_R8G8_USCALED,
[PIPE_FORMAT_R8G8B8_USCALED] = BRW_SURFACEFORMAT_R8G8B8_USCALED,
[PIPE_FORMAT_R8G8B8A8_USCALED] = BRW_SURFACEFORMAT_R8G8B8A8_USCALED,
[PIPE_FORMAT_R8_SNORM] = BRW_SURFACEFORMAT_R8_SNORM,
[PIPE_FORMAT_R8G8_SNORM] = BRW_SURFACEFORMAT_R8G8_SNORM,
[PIPE_FORMAT_R8G8B8_SNORM] = BRW_SURFACEFORMAT_R8G8B8_SNORM,
[PIPE_FORMAT_R8G8B8A8_SNORM] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
[PIPE_FORMAT_R8_SSCALED] = BRW_SURFACEFORMAT_R8_SSCALED,
[PIPE_FORMAT_R8G8_SSCALED] = BRW_SURFACEFORMAT_R8G8_SSCALED,
[PIPE_FORMAT_R8G8B8_SSCALED] = BRW_SURFACEFORMAT_R8G8B8_SSCALED,
[PIPE_FORMAT_R8G8B8A8_SSCALED] = BRW_SURFACEFORMAT_R8G8B8A8_SSCALED,
[PIPE_FORMAT_R32_FIXED] = BRW_SURFACEFORMAT_R32_SFIXED,
[PIPE_FORMAT_R32G32_FIXED] = BRW_SURFACEFORMAT_R32G32_SFIXED,
[PIPE_FORMAT_R32G32B32_FIXED] = BRW_SURFACEFORMAT_R32G32B32_SFIXED,
[PIPE_FORMAT_R32G32B32A32_FIXED] = BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
[PIPE_FORMAT_R16_FLOAT] = BRW_SURFACEFORMAT_R16_FLOAT,
[PIPE_FORMAT_R16G16_FLOAT] = BRW_SURFACEFORMAT_R16G16_FLOAT,
[PIPE_FORMAT_R16G16B16_FLOAT] = BRW_SURFACEFORMAT_R16G16B16_FLOAT,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
[PIPE_FORMAT_L8_SRGB] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
[PIPE_FORMAT_L8A8_SRGB] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
[PIPE_FORMAT_R8G8B8_SRGB] = BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB,
[PIPE_FORMAT_A8B8G8R8_SRGB] = 0,
[PIPE_FORMAT_X8B8G8R8_SRGB] = 0,
[PIPE_FORMAT_B8G8R8A8_SRGB] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB,
[PIPE_FORMAT_B8G8R8X8_SRGB] = 0,
[PIPE_FORMAT_A8R8G8B8_SRGB] = 0,
[PIPE_FORMAT_X8R8G8B8_SRGB] = 0,
[PIPE_FORMAT_R8G8B8A8_SRGB] = 0,
[PIPE_FORMAT_DXT1_RGB] = BRW_SURFACEFORMAT_DXT1_RGB,
[PIPE_FORMAT_DXT1_RGBA] = BRW_SURFACEFORMAT_BC1_UNORM,
[PIPE_FORMAT_DXT3_RGBA] = BRW_SURFACEFORMAT_BC2_UNORM,
[PIPE_FORMAT_DXT5_RGBA] = BRW_SURFACEFORMAT_BC3_UNORM,
[PIPE_FORMAT_DXT1_SRGB] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB,
[PIPE_FORMAT_DXT1_SRGBA] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
[PIPE_FORMAT_DXT3_SRGBA] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB,
[PIPE_FORMAT_DXT5_SRGBA] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB,
[PIPE_FORMAT_RGTC1_UNORM] = BRW_SURFACEFORMAT_BC4_UNORM,
[PIPE_FORMAT_RGTC1_SNORM] = BRW_SURFACEFORMAT_BC4_SNORM,
[PIPE_FORMAT_RGTC2_UNORM] = BRW_SURFACEFORMAT_BC5_UNORM,
[PIPE_FORMAT_RGTC2_SNORM] = BRW_SURFACEFORMAT_BC5_SNORM,
[PIPE_FORMAT_R8G8_B8G8_UNORM] = 0,
[PIPE_FORMAT_G8R8_G8B8_UNORM] = 0,
[PIPE_FORMAT_R8SG8SB8UX8U_NORM] = 0,
[PIPE_FORMAT_R5SG5SB6U_NORM] = 0,
[PIPE_FORMAT_A8B8G8R8_UNORM] = 0,
[PIPE_FORMAT_B5G5R5X1_UNORM] = BRW_SURFACEFORMAT_B5G5R5X1_UNORM,
[PIPE_FORMAT_R10G10B10A2_USCALED] = BRW_SURFACEFORMAT_R10G10B10A2_USCALED,
[PIPE_FORMAT_R11G11B10_FLOAT] = BRW_SURFACEFORMAT_R11G11B10_FLOAT,
[PIPE_FORMAT_R9G9B9E5_FLOAT] = BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP,
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = 0,
[PIPE_FORMAT_R1_UNORM] = 0,
[PIPE_FORMAT_R10G10B10X2_USCALED] = BRW_SURFACEFORMAT_R10G10B10X2_USCALED,
[PIPE_FORMAT_R10G10B10X2_SNORM] = 0,
[PIPE_FORMAT_L4A4_UNORM] = 0,
[PIPE_FORMAT_B10G10R10A2_UNORM] = BRW_SURFACEFORMAT_B10G10R10A2_UNORM,
[PIPE_FORMAT_R10SG10SB10SA2U_NORM] = 0,
[PIPE_FORMAT_R8G8Bx_SNORM] = 0,
[PIPE_FORMAT_R8G8B8X8_UNORM] = BRW_SURFACEFORMAT_R8G8B8X8_UNORM,
[PIPE_FORMAT_B4G4R4X4_UNORM] = 0,
[PIPE_FORMAT_X24S8_UINT] = 0,
[PIPE_FORMAT_S8X24_UINT] = 0,
[PIPE_FORMAT_X32_S8X24_UINT] = 0,
[PIPE_FORMAT_B2G3R3_UNORM] = 0,
[PIPE_FORMAT_L16A16_UNORM] = BRW_SURFACEFORMAT_L16A16_UNORM,
[PIPE_FORMAT_A16_UNORM] = BRW_SURFACEFORMAT_A16_UNORM,
[PIPE_FORMAT_I16_UNORM] = BRW_SURFACEFORMAT_I16_UNORM,
[PIPE_FORMAT_LATC1_UNORM] = 0,
[PIPE_FORMAT_LATC1_SNORM] = 0,
[PIPE_FORMAT_LATC2_UNORM] = 0,
[PIPE_FORMAT_LATC2_SNORM] = 0,
[PIPE_FORMAT_A8_SNORM] = 0,
[PIPE_FORMAT_L8_SNORM] = 0,
[PIPE_FORMAT_L8A8_SNORM] = 0,
[PIPE_FORMAT_I8_SNORM] = 0,
[PIPE_FORMAT_A16_SNORM] = 0,
[PIPE_FORMAT_L16_SNORM] = 0,
[PIPE_FORMAT_L16A16_SNORM] = 0,
[PIPE_FORMAT_I16_SNORM] = 0,
[PIPE_FORMAT_A16_FLOAT] = BRW_SURFACEFORMAT_A16_FLOAT,
[PIPE_FORMAT_L16_FLOAT] = BRW_SURFACEFORMAT_L16_FLOAT,
[PIPE_FORMAT_L16A16_FLOAT] = BRW_SURFACEFORMAT_L16A16_FLOAT,
[PIPE_FORMAT_I16_FLOAT] = BRW_SURFACEFORMAT_I16_FLOAT,
[PIPE_FORMAT_A32_FLOAT] = BRW_SURFACEFORMAT_A32_FLOAT,
[PIPE_FORMAT_L32_FLOAT] = BRW_SURFACEFORMAT_L32_FLOAT,
[PIPE_FORMAT_L32A32_FLOAT] = BRW_SURFACEFORMAT_L32A32_FLOAT,
[PIPE_FORMAT_I32_FLOAT] = BRW_SURFACEFORMAT_I32_FLOAT,
[PIPE_FORMAT_YV12] = 0,
[PIPE_FORMAT_YV16] = 0,
[PIPE_FORMAT_IYUV] = 0,
[PIPE_FORMAT_NV12] = 0,
[PIPE_FORMAT_NV21] = 0,
[PIPE_FORMAT_R4A4_UNORM] = 0,
[PIPE_FORMAT_A4R4_UNORM] = 0,
[PIPE_FORMAT_R8A8_UNORM] = 0,
[PIPE_FORMAT_A8R8_UNORM] = 0,
[PIPE_FORMAT_R10G10B10A2_SSCALED] = BRW_SURFACEFORMAT_R10G10B10A2_SSCALED,
[PIPE_FORMAT_R10G10B10A2_SNORM] = BRW_SURFACEFORMAT_R10G10B10A2_SNORM,
[PIPE_FORMAT_B10G10R10A2_USCALED] = BRW_SURFACEFORMAT_B10G10R10A2_USCALED,
[PIPE_FORMAT_B10G10R10A2_SSCALED] = BRW_SURFACEFORMAT_B10G10R10A2_SSCALED,
[PIPE_FORMAT_B10G10R10A2_SNORM] = BRW_SURFACEFORMAT_B10G10R10A2_SNORM,
[PIPE_FORMAT_R8_UINT] = BRW_SURFACEFORMAT_R8_UINT,
[PIPE_FORMAT_R8G8_UINT] = BRW_SURFACEFORMAT_R8G8_UINT,
[PIPE_FORMAT_R8G8B8_UINT] = BRW_SURFACEFORMAT_R8G8B8_UINT,
[PIPE_FORMAT_R8G8B8A8_UINT] = BRW_SURFACEFORMAT_R8G8B8A8_UINT,
[PIPE_FORMAT_R8_SINT] = BRW_SURFACEFORMAT_R8_SINT,
[PIPE_FORMAT_R8G8_SINT] = BRW_SURFACEFORMAT_R8G8_SINT,
[PIPE_FORMAT_R8G8B8_SINT] = BRW_SURFACEFORMAT_R8G8B8_SINT,
[PIPE_FORMAT_R8G8B8A8_SINT] = BRW_SURFACEFORMAT_R8G8B8A8_SINT,
[PIPE_FORMAT_R16_UINT] = BRW_SURFACEFORMAT_R16_UINT,
[PIPE_FORMAT_R16G16_UINT] = BRW_SURFACEFORMAT_R16G16_UINT,
[PIPE_FORMAT_R16G16B16_UINT] = BRW_SURFACEFORMAT_R16G16B16_UINT,
[PIPE_FORMAT_R16G16B16A16_UINT] = BRW_SURFACEFORMAT_R16G16B16A16_UINT,
[PIPE_FORMAT_R16_SINT] = BRW_SURFACEFORMAT_R16_SINT,
[PIPE_FORMAT_R16G16_SINT] = BRW_SURFACEFORMAT_R16G16_SINT,
[PIPE_FORMAT_R16G16B16_SINT] = BRW_SURFACEFORMAT_R16G16B16_SINT,
[PIPE_FORMAT_R16G16B16A16_SINT] = BRW_SURFACEFORMAT_R16G16B16A16_SINT,
[PIPE_FORMAT_R32_UINT] = BRW_SURFACEFORMAT_R32_UINT,
[PIPE_FORMAT_R32G32_UINT] = BRW_SURFACEFORMAT_R32G32_UINT,
[PIPE_FORMAT_R32G32B32_UINT] = BRW_SURFACEFORMAT_R32G32B32_UINT,
[PIPE_FORMAT_R32G32B32A32_UINT] = BRW_SURFACEFORMAT_R32G32B32A32_UINT,
[PIPE_FORMAT_R32_SINT] = BRW_SURFACEFORMAT_R32_SINT,
[PIPE_FORMAT_R32G32_SINT] = BRW_SURFACEFORMAT_R32G32_SINT,
[PIPE_FORMAT_R32G32B32_SINT] = BRW_SURFACEFORMAT_R32G32B32_SINT,
[PIPE_FORMAT_R32G32B32A32_SINT] = BRW_SURFACEFORMAT_R32G32B32A32_SINT,
[PIPE_FORMAT_A8_UINT] = 0,
[PIPE_FORMAT_I8_UINT] = BRW_SURFACEFORMAT_I8_UINT,
[PIPE_FORMAT_L8_UINT] = BRW_SURFACEFORMAT_L8_UINT,
[PIPE_FORMAT_L8A8_UINT] = BRW_SURFACEFORMAT_L8A8_UINT,
[PIPE_FORMAT_A8_SINT] = 0,
[PIPE_FORMAT_I8_SINT] = BRW_SURFACEFORMAT_I8_SINT,
[PIPE_FORMAT_L8_SINT] = BRW_SURFACEFORMAT_L8_SINT,
[PIPE_FORMAT_L8A8_SINT] = BRW_SURFACEFORMAT_L8A8_SINT,
[PIPE_FORMAT_A16_UINT] = 0,
[PIPE_FORMAT_I16_UINT] = 0,
[PIPE_FORMAT_L16_UINT] = 0,
[PIPE_FORMAT_L16A16_UINT] = 0,
[PIPE_FORMAT_A16_SINT] = 0,
[PIPE_FORMAT_I16_SINT] = 0,
[PIPE_FORMAT_L16_SINT] = 0,
[PIPE_FORMAT_L16A16_SINT] = 0,
[PIPE_FORMAT_A32_UINT] = 0,
[PIPE_FORMAT_I32_UINT] = 0,
[PIPE_FORMAT_L32_UINT] = 0,
[PIPE_FORMAT_L32A32_UINT] = 0,
[PIPE_FORMAT_A32_SINT] = 0,
[PIPE_FORMAT_I32_SINT] = 0,
[PIPE_FORMAT_L32_SINT] = 0,
[PIPE_FORMAT_L32A32_SINT] = 0,
[PIPE_FORMAT_B10G10R10A2_UINT] = BRW_SURFACEFORMAT_B10G10R10A2_UINT,
[PIPE_FORMAT_ETC1_RGB8] = BRW_SURFACEFORMAT_ETC1_RGB8,
[PIPE_FORMAT_R8G8_R8B8_UNORM] = 0,
[PIPE_FORMAT_G8R8_B8R8_UNORM] = 0,
[PIPE_FORMAT_R8G8B8X8_SNORM] = 0,
[PIPE_FORMAT_R8G8B8X8_SRGB] = 0,
[PIPE_FORMAT_R8G8B8X8_UINT] = 0,
[PIPE_FORMAT_R8G8B8X8_SINT] = 0,
[PIPE_FORMAT_B10G10R10X2_UNORM] = BRW_SURFACEFORMAT_B10G10R10X2_UNORM,
[PIPE_FORMAT_R16G16B16X16_UNORM] = BRW_SURFACEFORMAT_R16G16B16X16_UNORM,
[PIPE_FORMAT_R16G16B16X16_SNORM] = 0,
[PIPE_FORMAT_R16G16B16X16_FLOAT] = BRW_SURFACEFORMAT_R16G16B16X16_FLOAT,
[PIPE_FORMAT_R16G16B16X16_UINT] = 0,
[PIPE_FORMAT_R16G16B16X16_SINT] = 0,
[PIPE_FORMAT_R32G32B32X32_FLOAT] = BRW_SURFACEFORMAT_R32G32B32X32_FLOAT,
[PIPE_FORMAT_R32G32B32X32_UINT] = 0,
[PIPE_FORMAT_R32G32B32X32_SINT] = 0,
[PIPE_FORMAT_R8A8_SNORM] = 0,
[PIPE_FORMAT_R16A16_UNORM] = 0,
[PIPE_FORMAT_R16A16_SNORM] = 0,
[PIPE_FORMAT_R16A16_FLOAT] = 0,
[PIPE_FORMAT_R32A32_FLOAT] = 0,
[PIPE_FORMAT_R8A8_UINT] = 0,
[PIPE_FORMAT_R8A8_SINT] = 0,
[PIPE_FORMAT_R16A16_UINT] = 0,
[PIPE_FORMAT_R16A16_SINT] = 0,
[PIPE_FORMAT_R32A32_UINT] = 0,
[PIPE_FORMAT_R32A32_SINT] = 0,
};
int sfmt = format_mapping[format];
 
/* BRW_SURFACEFORMAT_R32G32B32A32_FLOAT happens to be 0 */
if (!sfmt && format != PIPE_FORMAT_R32G32B32A32_FLOAT)
sfmt = -1;
 
return sfmt;
}
 
static boolean
ilo_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned bindings)
{
struct ilo_screen *is = ilo_screen(screen);
const int gen = ILO_GEN_GET_MAJOR(is->dev.gen * 10);
const bool is_pure_int = util_format_is_pure_integer(format);
const struct surface_format_info *info;
unsigned bind;
 
if (!util_format_is_supported(format, bindings))
return false;
 
/* no MSAA support yet */
if (sample_count > 1)
return false;
 
bind = (bindings & PIPE_BIND_DEPTH_STENCIL);
if (bind) {
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
break;
case PIPE_FORMAT_S8_UINT:
/* TODO separate stencil */
default:
return false;
}
}
 
bind = (bindings & PIPE_BIND_RENDER_TARGET);
if (bind) {
info = lookup_surface_format_info(format, bind);
 
if (gen < info->render_target)
return false;
 
if (!is_pure_int && gen < info->alpha_blend)
return false;
}
 
bind = (bindings & PIPE_BIND_SAMPLER_VIEW);
if (bind) {
info = lookup_surface_format_info(format, bind);
 
if (gen < info->sampling)
return false;
 
if (!is_pure_int && gen < info->filtering)
return false;
}
 
bind = (bindings & PIPE_BIND_VERTEX_BUFFER);
if (bind) {
info = lookup_surface_format_info(format, bind);
 
if (gen < info->input_vb)
return false;
}
 
return true;
}
 
static boolean
ilo_is_video_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_video_profile profile)
{
return vl_video_buffer_is_format_supported(screen, format, profile);
}
 
/**
* Initialize format-related functions.
*/
void
ilo_init_format_functions(struct ilo_screen *is)
{
is->base.is_format_supported = ilo_is_format_supported;
is->base.is_video_format_supported = ilo_is_video_format_supported;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_format.h
0,0 → 1,142
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_FORMAT_H
#define ILO_FORMAT_H
 
#include "brw_defines.h"
 
#include "ilo_common.h"
 
struct ilo_screen;
 
void
ilo_init_format_functions(struct ilo_screen *is);
 
int
ilo_translate_color_format(enum pipe_format format);
 
/**
* Translate a pipe format to a hardware surface format suitable for
* the given purpose. Return -1 on errors.
*
* This is an inline function not only for performance reasons. There are
* caveats that the callers should that before calling this function.
*/
static inline int
ilo_translate_format(enum pipe_format format, unsigned bind)
{
switch (bind) {
case PIPE_BIND_RENDER_TARGET:
/*
* Some RGBX formats are not supported as render target formats. But we
* can use their RGBA counterparts and force the destination alpha to be
* one when blending is enabled.
*/
switch (format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
default:
return ilo_translate_color_format(format);
}
break;
case PIPE_BIND_SAMPLER_VIEW:
/*
* For depth formats, we want the depth values to be returned as R
* values. But we assume in many places that the depth values are
* returned as I values (util_make_fragment_tex_shader_writedepth() is
* one such example). We have to live with that at least for now.
*
* For ETC1 format, the texture data will be decompressed before being
* written to the bo. See tex_staging_sys_convert_write().
*/
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return BRW_SURFACEFORMAT_I16_UNORM;
case PIPE_FORMAT_Z32_FLOAT:
return BRW_SURFACEFORMAT_I32_FLOAT;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
return BRW_SURFACEFORMAT_I24X8_UNORM;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return BRW_SURFACEFORMAT_I32X32_FLOAT;
case PIPE_FORMAT_ETC1_RGB8:
return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
default:
return ilo_translate_color_format(format);
}
break;
case PIPE_BIND_VERTEX_BUFFER:
/*
* Some 3-component formats are not supported as vertex element formats.
* But since we move between vertices using vb->stride, we should be
* good to use their 4-component counterparts if we force the W
* component to be one. The only exception is that the vb boundary
* check for the last vertex may fail.
*/
switch (format) {
case PIPE_FORMAT_R16G16B16_FLOAT:
return BRW_SURFACEFORMAT_R16G16B16A16_FLOAT;
case PIPE_FORMAT_R16G16B16_UINT:
return BRW_SURFACEFORMAT_R16G16B16A16_UINT;
case PIPE_FORMAT_R16G16B16_SINT:
return BRW_SURFACEFORMAT_R16G16B16A16_SINT;
case PIPE_FORMAT_R8G8B8_UINT:
return BRW_SURFACEFORMAT_R8G8B8A8_UINT;
case PIPE_FORMAT_R8G8B8_SINT:
return BRW_SURFACEFORMAT_R8G8B8A8_SINT;
default:
return ilo_translate_color_format(format);
}
break;
default:
assert(!"cannot translate format");
break;
}
 
return -1;
}
 
static inline int
ilo_translate_render_format(enum pipe_format format)
{
return ilo_translate_format(format, PIPE_BIND_RENDER_TARGET);
}
 
static inline int
ilo_translate_texture_format(enum pipe_format format)
{
return ilo_translate_format(format, PIPE_BIND_SAMPLER_VIEW);
}
 
static inline int
ilo_translate_vertex_format(enum pipe_format format)
{
return ilo_translate_format(format, PIPE_BIND_VERTEX_BUFFER);
}
 
#endif /* ILO_FORMAT_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe.h
0,0 → 1,528
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_GPE_H
#define ILO_GPE_H
 
#include "ilo_common.h"
 
/**
* \see brw_context.h
*/
#define ILO_MAX_DRAW_BUFFERS 8
#define ILO_MAX_CONST_BUFFERS (1 + 12)
#define ILO_MAX_SAMPLER_VIEWS 16
#define ILO_MAX_SAMPLERS 16
#define ILO_MAX_SO_BINDINGS 64
#define ILO_MAX_SO_BUFFERS 4
#define ILO_MAX_VIEWPORTS 1
 
#define ILO_MAX_VS_SURFACES (ILO_MAX_CONST_BUFFERS + ILO_MAX_SAMPLER_VIEWS)
#define ILO_VS_CONST_SURFACE(i) (i)
#define ILO_VS_TEXTURE_SURFACE(i) (ILO_MAX_CONST_BUFFERS + i)
 
#define ILO_MAX_GS_SURFACES (ILO_MAX_SO_BINDINGS)
#define ILO_GS_SO_SURFACE(i) (i)
 
#define ILO_MAX_WM_SURFACES (ILO_MAX_DRAW_BUFFERS + ILO_MAX_CONST_BUFFERS + ILO_MAX_SAMPLER_VIEWS)
#define ILO_WM_DRAW_SURFACE(i) (i)
#define ILO_WM_CONST_SURFACE(i) (ILO_MAX_DRAW_BUFFERS + i)
#define ILO_WM_TEXTURE_SURFACE(i) (ILO_MAX_DRAW_BUFFERS + ILO_MAX_CONST_BUFFERS + i)
 
struct ilo_buffer;
struct ilo_texture;
struct ilo_shader_state;
 
struct ilo_vb_state {
struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
uint32_t enabled_mask;
};
 
struct ilo_ib_state {
struct pipe_resource *buffer;
const void *user_buffer;
unsigned offset;
unsigned index_size;
 
/* these are not valid until the state is finalized */
struct pipe_resource *hw_resource;
unsigned hw_index_size;
/* an offset to be added to pipe_draw_info::start */
int64_t draw_start_offset;
};
 
struct ilo_ve_cso {
/* VERTEX_ELEMENT_STATE */
uint32_t payload[2];
};
 
struct ilo_ve_state {
struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
unsigned count;
 
unsigned instance_divisors[PIPE_MAX_ATTRIBS];
unsigned vb_mapping[PIPE_MAX_ATTRIBS];
unsigned vb_count;
};
 
struct ilo_so_state {
struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
unsigned count;
unsigned append_bitmask;
 
bool enabled;
};
 
struct ilo_viewport_cso {
/* matrix form */
float m00, m11, m22, m30, m31, m32;
 
/* guardband in NDC space */
float min_gbx, min_gby, max_gbx, max_gby;
 
/* viewport in screen space */
float min_x, min_y, min_z;
float max_x, max_y, max_z;
};
 
struct ilo_viewport_state {
struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
unsigned count;
 
struct pipe_viewport_state viewport0;
};
 
struct ilo_scissor_state {
/* SCISSOR_RECT */
uint32_t payload[ILO_MAX_VIEWPORTS * 2];
 
struct pipe_scissor_state scissor0;
};
 
struct ilo_rasterizer_clip {
/* 3DSTATE_CLIP */
uint32_t payload[3];
 
uint32_t can_enable_guardband;
};
 
struct ilo_rasterizer_sf {
/* 3DSTATE_SF */
uint32_t payload[6];
uint32_t dw_msaa;
};
 
struct ilo_rasterizer_wm {
/* 3DSTATE_WM */
uint32_t payload[2];
uint32_t dw_msaa_rast;
uint32_t dw_msaa_disp;
};
 
struct ilo_rasterizer_state {
struct pipe_rasterizer_state state;
 
struct ilo_rasterizer_clip clip;
struct ilo_rasterizer_sf sf;
struct ilo_rasterizer_wm wm;
};
 
struct ilo_dsa_state {
/* DEPTH_STENCIL_STATE */
uint32_t payload[3];
 
struct pipe_alpha_state alpha;
};
 
struct ilo_blend_cso {
/* BLEND_STATE */
uint32_t payload[2];
 
uint32_t dw_blend;
uint32_t dw_blend_dst_alpha_forced_one;
 
uint32_t dw_logicop;
uint32_t dw_alpha_mod;
};
 
struct ilo_blend_state {
struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
 
bool independent_blend_enable;
bool dual_blend;
bool alpha_to_coverage;
};
 
struct ilo_sampler_cso {
/* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
uint32_t payload[15];
 
uint32_t dw_filter;
uint32_t dw_filter_aniso;
uint32_t dw_wrap;
uint32_t dw_wrap_1d;
uint32_t dw_wrap_cube;
 
bool anisotropic;
bool saturate_r;
bool saturate_s;
bool saturate_t;
};
 
struct ilo_sampler_state {
const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
unsigned count;
};
 
struct ilo_view_surface {
/* SURFACE_STATE */
uint32_t payload[8];
struct intel_bo *bo;
};
 
struct ilo_view_cso {
struct pipe_sampler_view base;
 
struct ilo_view_surface surface;
};
 
struct ilo_view_state {
struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
unsigned count;
};
 
struct ilo_cbuf_cso {
struct pipe_resource *resource;
struct ilo_view_surface surface;
 
/*
* this CSO is not so constant because user buffer needs to be uploaded in
* finalize_constant_buffers()
*/
const void *user_buffer;
unsigned user_buffer_size;
};
 
struct ilo_cbuf_state {
struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
uint32_t enabled_mask;
};
 
struct ilo_resource_state {
struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
unsigned count;
};
 
struct ilo_surface_cso {
struct pipe_surface base;
 
bool is_rt;
union {
struct ilo_view_surface rt;
struct ilo_zs_surface {
uint32_t payload[10];
struct intel_bo *bo;
struct intel_bo *hiz_bo;
struct intel_bo *separate_s8_bo;
} zs;
} u;
};
 
struct ilo_fb_state {
struct pipe_framebuffer_state state;
 
struct ilo_zs_surface null_zs;
unsigned num_samples;
};
 
struct ilo_global_binding {
/*
* XXX These should not be treated as real resources (and there could be
* thousands of them). They should be treated as regions in GLOBAL
* resource, which is the only real resource.
*
* That is, a resource here should instead be
*
* struct ilo_global_region {
* struct pipe_resource base;
* int offset;
* int size;
* };
*
* and it describes the region [offset, offset + size) in GLOBAL
* resource.
*/
struct pipe_resource *resources[PIPE_MAX_SHADER_RESOURCES];
uint32_t *handles[PIPE_MAX_SHADER_RESOURCES];
unsigned count;
};
 
struct ilo_shader_cso {
uint32_t payload[5];
};
 
void
ilo_gpe_init_ve(const struct ilo_dev_info *dev,
unsigned num_states,
const struct pipe_vertex_element *states,
struct ilo_ve_state *ve);
 
void
ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
const struct pipe_viewport_state *state,
struct ilo_viewport_cso *vp);
 
void
ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
unsigned start_slot,
unsigned num_states,
const struct pipe_scissor_state *states,
struct ilo_scissor_state *scissor);
 
void
ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
struct ilo_scissor_state *scissor);
 
void
ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_clip *clip);
 
void
ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_sf *sf);
 
void
ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_wm *wm);
 
void
ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_wm *wm);
 
static inline void
ilo_gpe_init_rasterizer(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_state *rasterizer)
{
ilo_gpe_init_rasterizer_clip(dev, state, &rasterizer->clip);
ilo_gpe_init_rasterizer_sf(dev, state, &rasterizer->sf);
 
if (dev->gen >= ILO_GEN(7))
ilo_gpe_init_rasterizer_wm_gen7(dev, state, &rasterizer->wm);
else
ilo_gpe_init_rasterizer_wm_gen6(dev, state, &rasterizer->wm);
}
 
void
ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
const struct pipe_depth_stencil_alpha_state *state,
struct ilo_dsa_state *dsa);
 
void
ilo_gpe_init_blend(const struct ilo_dev_info *dev,
const struct pipe_blend_state *state,
struct ilo_blend_state *blend);
 
void
ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
const struct pipe_sampler_state *state,
struct ilo_sampler_cso *sampler);
 
void
ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf);
 
void
ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf);
 
static inline void
ilo_gpe_init_view_surface_null(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
if (dev->gen >= ILO_GEN(7)) {
ilo_gpe_init_view_surface_null_gen7(dev,
width, height, depth, level, surf);
}
else {
ilo_gpe_init_view_surface_null_gen6(dev,
width, height, depth, level, surf);
}
}
 
static inline void
ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev_info *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
if (dev->gen >= ILO_GEN(7)) {
ilo_gpe_init_view_surface_for_buffer_gen7(dev, buf, offset, size,
struct_size, elem_format, is_rt, render_cache_rw, surf);
}
else {
ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, offset, size,
struct_size, elem_format, is_rt, render_cache_rw, surf);
}
}
 
static inline void
ilo_gpe_init_view_surface_for_texture(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
if (dev->gen >= ILO_GEN(7)) {
ilo_gpe_init_view_surface_for_texture_gen7(dev, tex, format,
first_level, num_levels, first_layer, num_layers,
is_rt, render_cache_rw, surf);
}
else {
ilo_gpe_init_view_surface_for_texture_gen6(dev, tex, format,
first_level, num_levels, first_layer, num_layers,
is_rt, render_cache_rw, surf);
}
}
 
void
ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned level,
unsigned first_layer, unsigned num_layers,
struct ilo_zs_surface *zs);
 
void
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso);
 
void
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso);
 
void
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso);
 
static inline void
ilo_gpe_init_gs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
if (dev->gen >= ILO_GEN(7)) {
ilo_gpe_init_gs_cso_gen7(dev, gs, cso);
}
else {
ilo_gpe_init_gs_cso_gen6(dev, gs, cso);
}
}
 
void
ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso);
 
void
ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso);
 
static inline void
ilo_gpe_init_fs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso)
{
if (dev->gen >= ILO_GEN(7)) {
ilo_gpe_init_fs_cso_gen7(dev, fs, cso);
}
else {
ilo_gpe_init_fs_cso_gen6(dev, fs, cso);
}
}
 
#endif /* ILO_GPE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.c
0,0 → 1,5032
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_dual_blend.h"
#include "util/u_half.h"
#include "brw_defines.h"
#include "intel_reg.h"
 
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
#include "ilo_gpe_gen6.h"
 
/**
* Translate winsys tiling to hardware tiling.
*/
int
ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
{
switch (tiling) {
case INTEL_TILING_NONE:
return 0;
case INTEL_TILING_X:
return BRW_SURFACE_TILED;
case INTEL_TILING_Y:
return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
default:
assert(!"unknown tiling");
return 0;
}
}
 
/**
* Translate a pipe primitive type to the matching hardware primitive type.
*/
int
ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
{
static const int prim_mapping[PIPE_PRIM_MAX] = {
[PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
[PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
[PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
[PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
[PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
[PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
[PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
[PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
[PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
[PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
[PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
};
 
assert(prim_mapping[prim]);
 
return prim_mapping[prim];
}
 
/**
* Translate a pipe texture target to the matching hardware surface type.
*/
int
ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
{
switch (target) {
case PIPE_BUFFER:
return BRW_SURFACE_BUFFER;
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return BRW_SURFACE_1D;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_2D_ARRAY:
return BRW_SURFACE_2D;
case PIPE_TEXTURE_3D:
return BRW_SURFACE_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return BRW_SURFACE_CUBE;
default:
assert(!"unknown texture target");
return BRW_SURFACE_BUFFER;
}
}
 
/**
* Translate a depth/stencil pipe format to the matching hardware
* format. Return -1 on errors.
*/
static int
gen6_translate_depth_format(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
return BRW_DEPTHFORMAT_D16_UNORM;
case PIPE_FORMAT_Z32_FLOAT:
return BRW_DEPTHFORMAT_D32_FLOAT;
case PIPE_FORMAT_Z24X8_UNORM:
return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
default:
return -1;
}
}
 
/**
* Translate a pipe logicop to the matching hardware logicop.
*/
static int
gen6_translate_pipe_logicop(unsigned logicop)
{
switch (logicop) {
case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
default:
assert(!"unknown logicop function");
return BRW_LOGICOPFUNCTION_CLEAR;
}
}
 
/**
* Translate a pipe blend function to the matching hardware blend function.
*/
static int
gen6_translate_pipe_blend(unsigned blend)
{
switch (blend) {
case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
default:
assert(!"unknown blend function");
return BRW_BLENDFUNCTION_ADD;
};
}
 
/**
* Translate a pipe blend factor to the matching hardware blend factor.
*/
static int
gen6_translate_pipe_blendfactor(unsigned blendfactor)
{
switch (blendfactor) {
case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
default:
assert(!"unknown blend factor");
return BRW_BLENDFACTOR_ONE;
};
}
 
/**
* Translate a pipe stencil op to the matching hardware stencil op.
*/
static int
gen6_translate_pipe_stencil_op(unsigned stencil_op)
{
switch (stencil_op) {
case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
default:
assert(!"unknown stencil op");
return BRW_STENCILOP_KEEP;
}
}
 
/**
* Translate a pipe texture mipfilter to the matching hardware mipfilter.
*/
static int
gen6_translate_tex_mipfilter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
default:
assert(!"unknown mipfilter");
return BRW_MIPFILTER_NONE;
}
}
 
/**
* Translate a pipe texture filter to the matching hardware mapfilter.
*/
static int
gen6_translate_tex_filter(unsigned filter)
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
default:
assert(!"unknown sampler filter");
return BRW_MAPFILTER_NEAREST;
}
}
 
/**
* Translate a pipe texture coordinate wrapping mode to the matching hardware
* wrapping mode.
*/
static int
gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
{
/* clamp to edge or border? */
if (wrap == PIPE_TEX_WRAP_CLAMP) {
wrap = (clamp_to_edge) ?
PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
}
 
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(!"unknown sampler wrap mode");
return BRW_TEXCOORDMODE_WRAP;
}
}
 
/**
* Translate a pipe DSA test function to the matching hardware compare
* function.
*/
static int
gen6_translate_dsa_func(unsigned func)
{
switch (func) {
case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
default:
assert(!"unknown depth/stencil/alpha test function");
return BRW_COMPAREFUNCTION_NEVER;
}
}
 
/**
* Translate a pipe shadow compare function to the matching hardware shadow
* function.
*/
static int
gen6_translate_shadow_func(unsigned func)
{
/*
* For PIPE_FUNC_x, the reference value is on the left-hand side of the
* comparison, and 1.0 is returned when the comparison is true.
*
* For BRW_PREFILTER_x, the reference value is on the right-hand side of
* the comparison, and 0.0 is returned when the comparison is true.
*/
switch (func) {
case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
default:
assert(!"unknown shadow compare function");
return BRW_PREFILTER_NEVER;
}
}
 
/**
* Translate an index size to the matching hardware index format.
*/
static int
gen6_translate_index_size(int size)
{
switch (size) {
case 4: return BRW_INDEX_DWORD;
case 2: return BRW_INDEX_WORD;
case 1: return BRW_INDEX_BYTE;
default:
assert(!"unknown index size");
return BRW_INDEX_BYTE;
}
}
 
static void
gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
struct intel_bo *general_state_bo,
struct intel_bo *surface_state_bo,
struct intel_bo *dynamic_state_bo,
struct intel_bo *indirect_object_bo,
struct intel_bo *instruction_bo,
uint32_t general_state_size,
uint32_t dynamic_state_size,
uint32_t indirect_object_size,
uint32_t instruction_size,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
const uint8_t cmd_len = 10;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/* 4K-page aligned */
assert(((general_state_size | dynamic_state_size |
indirect_object_size | instruction_size) & 0xfff) == 0);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
 
ilo_cp_write_bo(cp, 1, general_state_bo,
INTEL_DOMAIN_RENDER,
0);
ilo_cp_write_bo(cp, 1, surface_state_bo,
INTEL_DOMAIN_SAMPLER,
0);
ilo_cp_write_bo(cp, 1, dynamic_state_bo,
INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
0);
ilo_cp_write_bo(cp, 1, indirect_object_bo,
0,
0);
ilo_cp_write_bo(cp, 1, instruction_bo,
INTEL_DOMAIN_INSTRUCTION,
0);
 
if (general_state_size) {
ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
INTEL_DOMAIN_RENDER,
0);
}
else {
/* skip range check */
ilo_cp_write(cp, 1);
}
 
if (dynamic_state_size) {
ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
0);
}
else {
/* skip range check */
ilo_cp_write(cp, 0xfffff000 + 1);
}
 
if (indirect_object_size) {
ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
0,
0);
}
else {
/* skip range check */
ilo_cp_write(cp, 0xfffff000 + 1);
}
 
if (instruction_size) {
ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
INTEL_DOMAIN_INSTRUCTION,
0);
}
else {
/* skip range check */
ilo_cp_write(cp, 1);
}
 
ilo_cp_end(cp);
}
 
static void
gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
uint32_t sip,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
const uint8_t cmd_len = 2;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
ilo_cp_write(cp, cmd);
ilo_cp_write(cp, sip);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
bool enable,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
const uint8_t cmd_len = 1;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | enable);
ilo_cp_end(cp);
}
 
static void
gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
int pipeline,
struct ilo_cp *cp)
{
const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
const uint8_t cmd_len = 1;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/* 3D or media */
assert(pipeline == 0x0 || pipeline == 0x1);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | pipeline);
ilo_cp_end(cp);
}
 
static void
gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
int max_threads, int num_urb_entries,
int urb_entry_size,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
const uint8_t cmd_len = 8;
uint32_t dw2, dw4;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
dw2 = (max_threads - 1) << 16 |
num_urb_entries << 8 |
1 << 7 | /* Reset Gateway Timer */
1 << 6; /* Bypass Gateway Control */
 
dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
480; /* CURBE Allocation Size */
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* MBZ */
ilo_cp_write(cp, dw4);
ilo_cp_write(cp, 0); /* scoreboard */
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
}
 
static void
gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
uint32_t buf, int size,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
const uint8_t cmd_len = 4;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
assert(buf % 32 == 0);
/* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
size = align(size, 32);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0); /* MBZ */
ilo_cp_write(cp, size);
ilo_cp_write(cp, buf);
ilo_cp_end(cp);
}
 
static void
gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
uint32_t offset, int num_ids,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
const uint8_t cmd_len = 4;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
assert(offset % 32 == 0);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0); /* MBZ */
/* every ID has 8 DWords */
ilo_cp_write(cp, num_ids * 8 * 4);
ilo_cp_write(cp, offset);
ilo_cp_end(cp);
}
 
static void
gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
int id, int byte, int thread_count,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
const uint8_t cmd_len = 2;
uint32_t dw1;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
dw1 = id << 16 |
byte << 8 |
thread_count;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_end(cp);
}
 
static void
gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
int thread_count_water_mark,
int barrier_mask,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
const uint8_t cmd_len = 2;
uint32_t dw1;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
dw1 = thread_count_water_mark << 16 |
barrier_mask;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_end(cp);
}
 
static void
gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
struct ilo_cp *cp)
{
assert(!"MEDIA_OBJECT_WALKER unsupported");
}
 
static void
gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
uint32_t vs_binding_table,
uint32_t gs_binding_table,
uint32_t ps_binding_table,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
const uint8_t cmd_len = 4;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) |
GEN6_BINDING_TABLE_MODIFY_VS |
GEN6_BINDING_TABLE_MODIFY_GS |
GEN6_BINDING_TABLE_MODIFY_PS);
ilo_cp_write(cp, vs_binding_table);
ilo_cp_write(cp, gs_binding_table);
ilo_cp_write(cp, ps_binding_table);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
uint32_t vs_sampler_state,
uint32_t gs_sampler_state,
uint32_t ps_sampler_state,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
const uint8_t cmd_len = 4;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) |
VS_SAMPLER_STATE_CHANGE |
GS_SAMPLER_STATE_CHANGE |
PS_SAMPLER_STATE_CHANGE);
ilo_cp_write(cp, vs_sampler_state);
ilo_cp_write(cp, gs_sampler_state);
ilo_cp_write(cp, ps_sampler_state);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
int vs_total_size, int gs_total_size,
int vs_entry_size, int gs_entry_size,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
const uint8_t cmd_len = 3;
const int row_size = 128; /* 1024 bits */
int vs_alloc_size, gs_alloc_size;
int vs_num_entries, gs_num_entries;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
/* in 1024-bit URB rows */
vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
 
/* the valid range is [1, 5] */
if (!vs_alloc_size)
vs_alloc_size = 1;
if (!gs_alloc_size)
gs_alloc_size = 1;
assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
 
/* the valid range is [24, 256] in multiples of 4 */
vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
if (vs_num_entries > 256)
vs_num_entries = 256;
assert(vs_num_entries >= 24);
 
/* the valid range is [0, 256] in multiples of 4 */
gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
if (gs_num_entries > 256)
gs_num_entries = 256;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
(gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
const struct pipe_vertex_buffer *vbuffers,
uint64_t vbuffer_mask,
const struct ilo_ve_state *ve,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
uint8_t cmd_len;
unsigned hw_idx;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 82:
*
* "From 1 to 33 VBs can be specified..."
*/
assert(vbuffer_mask <= (1UL << 33));
 
if (!vbuffer_mask)
return;
 
cmd_len = 1;
 
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
const unsigned pipe_idx = ve->vb_mapping[hw_idx];
 
if (vbuffer_mask & (1 << pipe_idx))
cmd_len += 4;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
 
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
const unsigned instance_divisor = ve->instance_divisors[hw_idx];
const unsigned pipe_idx = ve->vb_mapping[hw_idx];
const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
uint32_t dw;
 
if (!(vbuffer_mask & (1 << pipe_idx)))
continue;
 
dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
 
if (instance_divisor)
dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
else
dw |= GEN6_VB0_ACCESS_VERTEXDATA;
 
if (dev->gen >= ILO_GEN(7))
dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
 
/* use null vb if there is no buffer or the stride is out of range */
if (vb->buffer && vb->stride <= 2048) {
const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
const uint32_t start_offset = vb->buffer_offset;
/*
* As noted in ilo_translate_format(), we treat some 3-component
* formats as 4-component formats to work around hardware
* limitations. Imagine the case where the vertex buffer holds a
* single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
* The hardware would not be able to fetch it because the vertex
* buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
* and that takes at least 8 bytes.
*
* For the workaround to work, we query the physical size, which is
* page aligned, to calculate end_offset so that the last vertex has
* a better chance to be fetched.
*/
const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
 
dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
 
ilo_cp_write(cp, dw);
ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
ilo_cp_write(cp, instance_divisor);
}
else {
dw |= 1 << 13;
 
ilo_cp_write(cp, dw);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, instance_divisor);
}
}
 
ilo_cp_end(cp);
}
 
static void
ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
struct ilo_ve_cso *cso)
{
int format;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 94:
*
* "- This bit (Edge Flag Enable) must only be ENABLED on the last
* valid VERTEX_ELEMENT structure.
*
* - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
* and Component 1-3 Control must be set to VFCOMP_NOSTORE.
*
* - The Source Element Format must be set to the UINT format.
*
* - [DevSNB]: Edge Flags are not supported for QUADLIST
* primitives. Software may elect to convert QUADLIST primitives
* to some set of corresponding edge-flag-supported primitive
* types (e.g., POLYGONs) prior to submission to the 3D pipeline."
*/
 
cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
cso->payload[1] =
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
 
/*
* Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
* glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
* via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
*
* Since all the hardware cares about is whether the flags are zero or not,
* we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
*/
format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
BRW_SURFACEFORMAT_R32_FLOAT - 1);
 
cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
}
else {
assert(format == BRW_SURFACEFORMAT_R8_UINT);
}
}
 
static void
ve_init_cso_with_components(const struct ilo_dev_info *dev,
int comp0, int comp1, int comp2, int comp3,
struct ilo_ve_cso *cso)
{
ILO_GPE_VALID_GEN(dev, 6, 7);
 
STATIC_ASSERT(Elements(cso->payload) >= 2);
cso->payload[0] = GEN6_VE0_VALID;
cso->payload[1] =
comp0 << BRW_VE1_COMPONENT_0_SHIFT |
comp1 << BRW_VE1_COMPONENT_1_SHIFT |
comp2 << BRW_VE1_COMPONENT_2_SHIFT |
comp3 << BRW_VE1_COMPONENT_3_SHIFT;
}
 
static void
ve_init_cso(const struct ilo_dev_info *dev,
const struct pipe_vertex_element *state,
unsigned vb_index,
struct ilo_ve_cso *cso)
{
int comp[4] = {
BRW_VE1_COMPONENT_STORE_SRC,
BRW_VE1_COMPONENT_STORE_SRC,
BRW_VE1_COMPONENT_STORE_SRC,
BRW_VE1_COMPONENT_STORE_SRC,
};
int format;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
switch (util_format_get_nr_components(state->src_format)) {
case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
BRW_VE1_COMPONENT_STORE_1_INT :
BRW_VE1_COMPONENT_STORE_1_FLT;
}
 
format = ilo_translate_vertex_format(state->src_format);
 
STATIC_ASSERT(Elements(cso->payload) >= 2);
cso->payload[0] =
vb_index << GEN6_VE0_INDEX_SHIFT |
GEN6_VE0_VALID |
format << BRW_VE0_FORMAT_SHIFT |
state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
 
cso->payload[1] =
comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
}
 
void
ilo_gpe_init_ve(const struct ilo_dev_info *dev,
unsigned num_states,
const struct pipe_vertex_element *states,
struct ilo_ve_state *ve)
{
unsigned i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ve->count = num_states;
ve->vb_count = 0;
 
for (i = 0; i < num_states; i++) {
const unsigned pipe_idx = states[i].vertex_buffer_index;
const unsigned instance_divisor = states[i].instance_divisor;
unsigned hw_idx;
 
/*
* map the pipe vb to the hardware vb, which has a fixed instance
* divisor
*/
for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
if (ve->vb_mapping[hw_idx] == pipe_idx &&
ve->instance_divisors[hw_idx] == instance_divisor)
break;
}
 
/* create one if there is no matching hardware vb */
if (hw_idx >= ve->vb_count) {
hw_idx = ve->vb_count++;
 
ve->vb_mapping[hw_idx] = pipe_idx;
ve->instance_divisors[hw_idx] = instance_divisor;
}
 
ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
}
}
 
static void
gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
const struct ilo_ve_state *ve,
bool last_velement_edgeflag,
bool prepend_generated_ids,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
uint8_t cmd_len;
unsigned i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 93:
*
* "Up to 34 (DevSNB+) vertex elements are supported."
*/
assert(ve->count + prepend_generated_ids <= 34);
 
if (!ve->count && !prepend_generated_ids) {
struct ilo_ve_cso dummy;
 
ve_init_cso_with_components(dev,
BRW_VE1_COMPONENT_STORE_0,
BRW_VE1_COMPONENT_STORE_0,
BRW_VE1_COMPONENT_STORE_0,
BRW_VE1_COMPONENT_STORE_1_FLT,
&dummy);
 
cmd_len = 3;
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write_multi(cp, dummy.payload, 2);
ilo_cp_end(cp);
 
return;
}
 
cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
 
if (prepend_generated_ids) {
struct ilo_ve_cso gen_ids;
 
ve_init_cso_with_components(dev,
BRW_VE1_COMPONENT_STORE_VID,
BRW_VE1_COMPONENT_STORE_IID,
BRW_VE1_COMPONENT_NOSTORE,
BRW_VE1_COMPONENT_NOSTORE,
&gen_ids);
 
ilo_cp_write_multi(cp, gen_ids.payload, 2);
}
 
if (last_velement_edgeflag) {
struct ilo_ve_cso edgeflag;
 
for (i = 0; i < ve->count - 1; i++)
ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
 
edgeflag = ve->cso[i];
ve_set_cso_edgeflag(dev, &edgeflag);
ilo_cp_write_multi(cp, edgeflag.payload, 2);
}
else {
for (i = 0; i < ve->count; i++)
ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
}
 
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
const struct ilo_ib_state *ib,
bool enable_cut_index,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
const uint8_t cmd_len = 3;
struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
uint32_t start_offset, end_offset;
int format;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
if (!buf)
return;
 
format = gen6_translate_index_size(ib->hw_index_size);
 
/*
* set start_offset to 0 here and adjust pipe_draw_info::start with
* ib->draw_start_offset in 3DPRIMITIVE
*/
start_offset = 0;
end_offset = buf->bo_size;
 
/* end_offset must also be aligned and is inclusive */
end_offset -= (end_offset % ib->hw_index_size);
end_offset--;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) |
((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
format << 8);
ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
uint32_t clip_viewport,
uint32_t sf_viewport,
uint32_t cc_viewport,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
const uint8_t cmd_len = 4;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) |
GEN6_CLIP_VIEWPORT_MODIFY |
GEN6_SF_VIEWPORT_MODIFY |
GEN6_CC_VIEWPORT_MODIFY);
ilo_cp_write(cp, clip_viewport);
ilo_cp_write(cp, sf_viewport);
ilo_cp_write(cp, cc_viewport);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
uint32_t blend_state,
uint32_t depth_stencil_state,
uint32_t color_calc_state,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
const uint8_t cmd_len = 4;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, blend_state | 1);
ilo_cp_write(cp, depth_stencil_state | 1);
ilo_cp_write(cp, color_calc_state | 1);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
uint32_t scissor_rect,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
const uint8_t cmd_len = 2;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, scissor_rect);
ilo_cp_end(cp);
}
 
void
ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 135:
*
* "(Vertex URB Entry Read Length) Specifies the number of pairs of
* 128-bit vertex elements to be passed into the payload for each
* vertex."
*
* "It is UNDEFINED to set this field to 0 indicating no Vertex URB
* data to be read and passed to the thread."
*/
vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
 
switch (dev->gen) {
case ILO_GEN(6):
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 22:
*
* "Device # of EUs #Threads/EU
* SNB GT2 12 5
* SNB GT1 6 4"
*/
max_threads = (dev->gt == 2) ? 60 : 24;
break;
case ILO_GEN(7):
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 18:
*
* "Device # of EUs #Threads/EU
* Ivy Bridge (GT2) 16 8
* Ivy Bridge (GT1) 6 6"
*/
max_threads = (dev->gt == 2) ? 128 : 36;
break;
case ILO_GEN(7.5):
/* see brwCreateContext() */
max_threads = (dev->gt == 2) ? 280 : 70;
break;
default:
max_threads = 1;
break;
}
 
dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
 
dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
 
dw5 = GEN6_VS_STATISTICS_ENABLE |
GEN6_VS_ENABLE;
 
if (dev->gen >= ILO_GEN(7.5))
dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
else
dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
}
 
static void
gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,
int num_samplers,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
const uint8_t cmd_len = 6;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
if (!vs) {
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
return;
}
 
cso = ilo_shader_get_kernel_cso(vs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
 
dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);
ilo_cp_write(cp, dw5);
ilo_cp_end(cp);
}
 
void
ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5, dw6;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_URB_DATA_START_REG);
 
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
}
else {
start_grf = ilo_shader_get_kernel_param(gs,
ILO_KERNEL_VS_GEN6_SO_START_REG);
 
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 153:
*
* "Specifies the amount of URB data read and passed in the thread
* payload for each Vertex URB entry, in 256-bit register increments.
*
* It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
* 0 indicating no Vertex URB data to be read and passed to the
* thread."
*/
vue_read_len = (vue_read_len + 1) / 2;
if (!vue_read_len)
vue_read_len = 1;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 154:
*
* "Maximum Number of Threads valid range is [0,27] when Rendering
* Enabled bit is set."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
*
* "Programming Note: If the GS stage is enabled, software must always
* allocate at least one GS URB Entry. This is true even if the GS
* thread never needs to output vertices to the pipeline, e.g., when
* only performing stream output. This is an artifact of the need to
* pass the GS thread an initial destination URB handle."
*
* As such, we always enable rendering, and limit the number of threads.
*/
if (dev->gt == 2) {
/* maximum is 60, but limited to 28 */
max_threads = 28;
}
else {
/* maximum is 24, but limited to 21 (see brwCreateContext()) */
max_threads = 21;
}
 
dw2 = GEN6_GS_SPF_MODE;
 
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
 
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_SO_STATISTICS_ENABLE |
GEN6_GS_RENDERING_ENABLE;
 
/*
* we cannot make use of GEN6_GS_REORDER because it will reorder
* triangle strips according to D3D rules (triangle 2N+1 uses vertices
* (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
* (2N+2, 2N+1, 2N+3)).
*/
dw6 = GEN6_GS_ENABLE;
 
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
dw6 |= GEN6_GS_DISCARD_ADJACENCY;
 
if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
const uint32_t svbi_post_inc =
ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
 
dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
if (svbi_post_inc) {
dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
}
}
 
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = dw6;
}
 
static void
gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
const struct ilo_shader_state *vs,
int verts_per_prim,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
const uint8_t cmd_len = 7;
uint32_t dw1, dw2, dw4, dw5, dw6;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
if (gs) {
const struct ilo_shader_cso *cso;
 
dw1 = ilo_shader_get_kernel_offset(gs);
 
cso = ilo_shader_get_kernel_cso(gs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
dw6 = cso->payload[3];
}
else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
struct ilo_shader_cso cso;
enum ilo_kernel_param param;
 
switch (verts_per_prim) {
case 1:
param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
break;
case 2:
param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
break;
default:
param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
break;
}
 
dw1 = ilo_shader_get_kernel_offset(vs) +
ilo_shader_get_kernel_param(vs, param);
 
/* cannot use VS's CSO */
ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
dw2 = cso.payload[0];
dw4 = cso.payload[1];
dw5 = cso.payload[2];
dw6 = cso.payload[3];
}
else {
dw1 = 0;
dw2 = 0;
dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
dw5 = GEN6_GS_STATISTICS_ENABLE;
dw6 = 0;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, dw4);
ilo_cp_write(cp, dw5);
ilo_cp_write(cp, dw6);
ilo_cp_end(cp);
}
 
void
ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_clip *clip)
{
uint32_t dw1, dw2, dw3;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
dw1 = GEN6_CLIP_STATISTICS_ENABLE;
 
if (dev->gen >= ILO_GEN(7)) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 219:
*
* "Workaround : Due to Hardware issue "EarlyCull" needs to be
* enabled only for the cases where the incoming primitive topology
* into the clipper guaranteed to be Trilist."
*
* What does this mean?
*/
dw1 |= 0 << 19 |
GEN7_CLIP_EARLY_CULL;
 
if (state->front_ccw)
dw1 |= GEN7_CLIP_WINDING_CCW;
 
switch (state->cull_face) {
case PIPE_FACE_NONE:
dw1 |= GEN7_CLIP_CULLMODE_NONE;
break;
case PIPE_FACE_FRONT:
dw1 |= GEN7_CLIP_CULLMODE_FRONT;
break;
case PIPE_FACE_BACK:
dw1 |= GEN7_CLIP_CULLMODE_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
dw1 |= GEN7_CLIP_CULLMODE_BOTH;
break;
}
}
 
dw2 = GEN6_CLIP_ENABLE |
GEN6_CLIP_XY_TEST |
state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
GEN6_CLIP_MODE_NORMAL;
 
if (state->clip_halfz)
dw2 |= GEN6_CLIP_API_D3D;
else
dw2 |= GEN6_CLIP_API_OGL;
 
if (state->depth_clip)
dw2 |= GEN6_CLIP_Z_TEST;
 
if (state->flatshade_first) {
dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
}
else {
dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
}
 
dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
 
clip->payload[0] = dw1;
clip->payload[1] = dw2;
clip->payload[2] = dw3;
 
clip->can_enable_guardband = true;
 
/*
* There are several reasons that guard band test should be disabled
*
* - GL wide points (to avoid partially visibie object)
* - GL wide or AA lines (to avoid partially visibie object)
*/
if (state->point_size_per_vertex || state->point_size > 1.0f)
clip->can_enable_guardband = false;
if (state->line_smooth || state->line_width > 1.0f)
clip->can_enable_guardband = false;
}
 
static void
gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
bool enable_guardband,
int num_viewports,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
const uint8_t cmd_len = 4;
uint32_t dw1, dw2, dw3;
 
if (rasterizer) {
int interps;
 
dw1 = rasterizer->clip.payload[0];
dw2 = rasterizer->clip.payload[1];
dw3 = rasterizer->clip.payload[2];
 
if (enable_guardband && rasterizer->clip.can_enable_guardband)
dw2 |= GEN6_CLIP_GB_TEST;
 
interps = (fs) ? ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
 
if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
 
dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
(num_viewports - 1);
}
else {
dw1 = 0;
dw2 = 0;
dw3 = 0;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, dw3);
ilo_cp_end(cp);
}
 
void
ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_sf *sf)
{
float offset_const, offset_scale, offset_clamp;
int line_width, point_width;
uint32_t dw1, dw2, dw3;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* Scale the constant term. The minimum representable value used by the HW
* is not large enouch to be the minimum resolvable difference.
*/
offset_const = state->offset_units * 2.0f;
 
offset_scale = state->offset_scale;
offset_clamp = state->offset_clamp;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
*
* "This bit (Statistics Enable) should be set whenever clipping is
* enabled and the Statistics Enable bit is set in CLIP_STATE. It
* should be cleared if clipping is disabled or Statistics Enable in
* CLIP_STATE is clear."
*/
dw1 = GEN6_SF_STATISTICS_ENABLE |
GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
 
/* XXX GEN6 path seems to work fine for GEN7 */
if (false && dev->gen >= ILO_GEN(7)) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 258:
*
* "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
* Enable Solid , Global Depth Offset Enable Wireframe, and Global
* Depth Offset Enable Point) should be set whenever non zero depth
* bias (Slope, Bias) values are used. Setting this bit may have
* some degradation of performance for some workloads."
*/
if (state->offset_tri || state->offset_line || state->offset_point) {
/* XXX need to scale offset_const according to the depth format */
dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
 
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
}
else {
offset_const = 0.0f;
offset_scale = 0.0f;
offset_clamp = 0.0f;
}
}
else {
if (state->offset_tri)
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
if (state->offset_line)
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
if (state->offset_point)
dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
}
 
switch (state->fill_front) {
case PIPE_POLYGON_MODE_FILL:
dw1 |= GEN6_SF_FRONT_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
dw1 |= GEN6_SF_FRONT_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
dw1 |= GEN6_SF_FRONT_POINT;
break;
}
 
switch (state->fill_back) {
case PIPE_POLYGON_MODE_FILL:
dw1 |= GEN6_SF_BACK_SOLID;
break;
case PIPE_POLYGON_MODE_LINE:
dw1 |= GEN6_SF_BACK_WIREFRAME;
break;
case PIPE_POLYGON_MODE_POINT:
dw1 |= GEN6_SF_BACK_POINT;
break;
}
 
if (state->front_ccw)
dw1 |= GEN6_SF_WINDING_CCW;
 
dw2 = 0;
 
if (state->line_smooth) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 251:
*
* "This field (Anti-aliasing Enable) must be disabled if any of the
* render targets have integer (UINT or SINT) surface format."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
*
* "This field (Hierarchical Depth Buffer Enable) must be disabled
* if Anti-aliasing Enable in 3DSTATE_SF is enabled.
*
* TODO We do not check those yet.
*/
dw2 |= GEN6_SF_LINE_AA_ENABLE |
GEN6_SF_LINE_END_CAP_WIDTH_1_0;
}
 
switch (state->cull_face) {
case PIPE_FACE_NONE:
dw2 |= GEN6_SF_CULL_NONE;
break;
case PIPE_FACE_FRONT:
dw2 |= GEN6_SF_CULL_FRONT;
break;
case PIPE_FACE_BACK:
dw2 |= GEN6_SF_CULL_BACK;
break;
case PIPE_FACE_FRONT_AND_BACK:
dw2 |= GEN6_SF_CULL_BOTH;
break;
}
 
/*
* Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
* pixels in the minor direction. We have to make the lines slightly
* thicker, 0.5 pixel on both sides, so that they intersect that many
* pixels are considered into the lines.
*
* Line width is in U3.7.
*/
line_width = (int) ((state->line_width +
(float) state->line_smooth) * 128.0f + 0.5f);
line_width = CLAMP(line_width, 0, 1023);
 
if (line_width == 128 && !state->line_smooth) {
/* use GIQ rules */
line_width = 0;
}
 
dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
 
if (state->scissor)
dw2 |= GEN6_SF_SCISSOR_ENABLE;
 
dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
GEN6_SF_VERTEX_SUBPIXEL_8BITS;
 
if (state->line_last_pixel)
dw3 |= 1 << 31;
 
if (state->flatshade_first) {
dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
0 << GEN6_SF_LINE_PROVOKE_SHIFT |
1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
}
else {
dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
1 << GEN6_SF_LINE_PROVOKE_SHIFT |
2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
}
 
if (!state->point_size_per_vertex)
dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
 
/* in U8.3 */
point_width = (int) (state->point_size * 8.0f + 0.5f);
point_width = CLAMP(point_width, 1, 2047);
 
dw3 |= point_width;
 
STATIC_ASSERT(Elements(sf->payload) >= 6);
sf->payload[0] = dw1;
sf->payload[1] = dw2;
sf->payload[2] = dw3;
sf->payload[3] = fui(offset_const);
sf->payload[4] = fui(offset_scale);
sf->payload[5] = fui(offset_clamp);
 
if (state->multisample) {
sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 251:
*
* "Software must not program a value of 0.0 when running in
* MSRASTMODE_ON_xxx modes - zero-width lines are not available
* when multisampling rasterization is enabled."
*/
if (!line_width) {
line_width = 128; /* 1.0f */
 
sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
}
}
else {
sf->dw_msaa = 0;
}
}
 
/**
* Fill in DW2 to DW7 of 3DSTATE_SF.
*/
void
ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
int num_samples,
enum pipe_format depth_format,
uint32_t *payload, unsigned payload_len)
{
const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
 
assert(payload_len == Elements(sf->payload));
 
if (sf) {
memcpy(payload, sf->payload, sizeof(sf->payload));
 
if (num_samples > 1)
payload[1] |= sf->dw_msaa;
 
if (dev->gen >= ILO_GEN(7)) {
int format;
 
/* separate stencil */
switch (depth_format) {
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
depth_format = PIPE_FORMAT_Z24X8_UNORM;
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
depth_format = PIPE_FORMAT_Z32_FLOAT;;
break;
case PIPE_FORMAT_S8_UINT:
depth_format = PIPE_FORMAT_NONE;
break;
default:
break;
}
 
format = gen6_translate_depth_format(depth_format);
/* FLOAT surface is assumed when there is no depth buffer */
if (format < 0)
format = BRW_DEPTHFORMAT_D32_FLOAT;
 
payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
}
}
else {
payload[0] = 0;
payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
payload[2] = 0;
payload[3] = 0;
payload[4] = 0;
payload[5] = 0;
}
}
 
/**
* Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
*/
void
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
uint32_t *dw, int num_dwords)
{
int output_count, vue_offset, vue_len;
const struct ilo_kernel_routing *routing;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
assert(num_dwords == 13);
 
if (!fs) {
memset(dw, 0, sizeof(dw[0]) * num_dwords);
 
if (dev->gen >= ILO_GEN(7))
dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
else
dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
 
return;
}
 
output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
assert(output_count <= 32);
 
routing = ilo_shader_get_kernel_routing(fs);
 
vue_offset = routing->source_skip;
assert(vue_offset % 2 == 0);
vue_offset /= 2;
 
vue_len = (routing->source_len + 1) / 2;
if (!vue_len)
vue_len = 1;
 
if (dev->gen >= ILO_GEN(7)) {
dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
if (routing->swizzle_enable)
dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
}
else {
dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
if (routing->swizzle_enable)
dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
}
 
switch (rasterizer->state.sprite_coord_mode) {
case PIPE_SPRITE_COORD_UPPER_LEFT:
dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
break;
case PIPE_SPRITE_COORD_LOWER_LEFT:
dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
break;
}
 
STATIC_ASSERT(Elements(routing->swizzles) >= 16);
memcpy(&dw[1], routing->swizzles, 2 * 16);
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 268:
*
* "This field (Point Sprite Texture Coordinate Enable) must be
* programmed to 0 when non-point primitives are rendered."
*
* TODO We do not check that yet.
*/
dw[9] = routing->point_sprite_enable;
 
dw[10] = routing->const_interp_enable;
 
/* WrapShortest enables */
dw[11] = 0;
dw[12] = 0;
}
 
static void
gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
const uint8_t cmd_len = 20;
uint32_t payload_raster[6], payload_sbe[13];
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
fs, last_sh, payload_sbe, Elements(payload_sbe));
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, payload_sbe[0]);
ilo_cp_write_multi(cp, payload_raster, 6);
ilo_cp_write_multi(cp, &payload_sbe[1], 12);
ilo_cp_end(cp);
}
 
void
ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_wm *wm)
{
uint32_t dw5, dw6;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
/* only the FF unit states are set, as in GEN7 */
 
dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
 
/* same value as in 3DSTATE_SF */
if (state->line_smooth)
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
 
if (state->poly_stipple_enable)
dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
if (state->line_stipple_enable)
dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
 
dw6 = GEN6_WM_POSITION_ZW_PIXEL |
GEN6_WM_MSRAST_OFF_PIXEL |
GEN6_WM_MSDISPMODE_PERSAMPLE;
 
if (state->bottom_edge_rule)
dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
 
/*
* assertion that makes sure
*
* dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
*
* is valid
*/
STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
 
wm->dw_msaa_rast =
(state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
 
STATIC_ASSERT(Elements(wm->payload) >= 2);
wm->payload[0] = dw5;
wm->payload[1] = dw6;
}
 
void
ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso)
{
int start_grf, input_count, interps, max_threads;
uint32_t dw2, dw4, dw5, dw6;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
interps = ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
 
/* see brwCreateContext() */
max_threads = (dev->gt == 2) ? 80 : 40;
 
dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
 
dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
 
dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
*
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
* PS kernel or color calculator has the ability to kill (discard)
* pixels or samples, other than due to depth or stencil testing.
* This bit is required to be ENABLED in the following situations:
*
* The API pixel shader program contains "killpix" or "discard"
* instructions, or other code in the pixel shader kernel that can
* cause the final pixel mask to differ from the pixel mask received
* on dispatch.
*
* A sampler with chroma key enabled with kill pixel mode is used by
* the pixel shader.
*
* Any render target has Alpha Test Enable or AlphaToCoverage Enable
* enabled.
*
* The pixel shader kernel generates and outputs oMask.
*
* Note: As ClipDistance clipping is fully supported in hardware and
* therefore not via PS instructions, there should be no need to
* ENABLE this bit due to ClipDistance clipping."
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
dw5 |= GEN6_WM_KILL_ENABLE;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 275:
*
* "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
* field must be set to disabled."
*
* TODO This is not checked yet.
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
dw5 |= GEN6_WM_COMPUTED_DEPTH;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
dw5 |= GEN6_WM_USES_SOURCE_W;
 
/*
* TODO set this bit only when
*
* a) fs writes colors and color is not masked, or
* b) fs writes depth, or
* c) fs or cc kills
*/
if (true)
dw5 |= GEN6_WM_DISPATCH_ENABLE;
 
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
 
dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
GEN6_WM_POSOFFSET_NONE |
interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = dw6;
}
 
static void
gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
int num_samplers,
const struct ilo_rasterizer_state *rasterizer,
bool dual_blend, bool cc_may_kill,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
const uint8_t cmd_len = 9;
const int num_samples = 1;
const struct ilo_shader_cso *fs_cso;
uint32_t dw2, dw4, dw5, dw6;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
if (!fs) {
/* see brwCreateContext() */
const int max_threads = (dev->gt == 2) ? 80 : 40;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
/* honor the valid range even if dispatching is disabled */
ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
 
return;
}
 
fs_cso = ilo_shader_get_kernel_cso(fs);
dw2 = fs_cso->payload[0];
dw4 = fs_cso->payload[1];
dw5 = fs_cso->payload[2];
dw6 = fs_cso->payload[3];
 
dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
 
if (true) {
dw4 |= GEN6_WM_STATISTICS_ENABLE;
}
else {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
*
* "This bit (Statistics Enable) must be disabled if either of these
* bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
* Resolve Enable or Depth Buffer Resolve Enable."
*/
dw4 |= GEN6_WM_DEPTH_CLEAR;
dw4 |= GEN6_WM_DEPTH_RESOLVE;
dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
}
 
if (cc_may_kill) {
dw5 |= GEN6_WM_KILL_ENABLE |
GEN6_WM_DISPATCH_ENABLE;
}
 
if (dual_blend)
dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
 
dw5 |= rasterizer->wm.payload[0];
 
dw6 |= rasterizer->wm.payload[1];
 
if (num_samples > 1) {
dw6 |= rasterizer->wm.dw_msaa_rast |
rasterizer->wm.dw_msaa_disp;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);
ilo_cp_write(cp, dw5);
ilo_cp_write(cp, dw6);
ilo_cp_write(cp, 0); /* kernel 1 */
ilo_cp_write(cp, 0); /* kernel 2 */
ilo_cp_end(cp);
}
 
static unsigned
gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs, int max_read_length,
uint32_t *dw, int num_dwords)
{
unsigned enabled = 0x0;
int total_read_length, i;
 
assert(num_dwords == 4);
 
total_read_length = 0;
for (i = 0; i < 4; i++) {
if (i < num_bufs && sizes[i]) {
/* in 256-bit units minus one */
const int read_len = (sizes[i] + 31) / 32 - 1;
 
assert(bufs[i] % 32 == 0);
assert(read_len < 32);
 
enabled |= 1 << i;
dw[i] = bufs[i] | read_len;
 
total_read_length += read_len + 1;
}
else {
dw[i] = 0;
}
}
 
assert(total_read_length <= max_read_length);
 
return enabled;
}
 
static void
gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
const uint8_t cmd_len = 5;
uint32_t buf_dw[4], buf_enabled;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
assert(num_bufs <= 4);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 138:
*
* "The sum of all four read length fields (each incremented to
* represent the actual read length) must be less than or equal to 32"
*/
buf_enabled = gen6_fill_3dstate_constant(dev,
bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
ilo_cp_write(cp, buf_dw[0]);
ilo_cp_write(cp, buf_dw[1]);
ilo_cp_write(cp, buf_dw[2]);
ilo_cp_write(cp, buf_dw[3]);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
const uint8_t cmd_len = 5;
uint32_t buf_dw[4], buf_enabled;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
assert(num_bufs <= 4);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 161:
*
* "The sum of all four read length fields (each incremented to
* represent the actual read length) must be less than or equal to 64"
*/
buf_enabled = gen6_fill_3dstate_constant(dev,
bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
ilo_cp_write(cp, buf_dw[0]);
ilo_cp_write(cp, buf_dw[1]);
ilo_cp_write(cp, buf_dw[2]);
ilo_cp_write(cp, buf_dw[3]);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
const uint8_t cmd_len = 5;
uint32_t buf_dw[4], buf_enabled;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
assert(num_bufs <= 4);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 287:
*
* "The sum of all four read length fields (each incremented to
* represent the actual read length) must be less than or equal to 64"
*/
buf_enabled = gen6_fill_3dstate_constant(dev,
bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
ilo_cp_write(cp, buf_dw[0]);
ilo_cp_write(cp, buf_dw[1]);
ilo_cp_write(cp, buf_dw[2]);
ilo_cp_write(cp, buf_dw[3]);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
unsigned sample_mask,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
const uint8_t cmd_len = 2;
const unsigned valid_mask = 0xf;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
sample_mask &= valid_mask;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, sample_mask);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
unsigned x, unsigned y,
unsigned width, unsigned height,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
const uint8_t cmd_len = 4;
unsigned xmax = x + width - 1;
unsigned ymax = y + height - 1;
int rect_limit;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
if (dev->gen >= ILO_GEN(7)) {
rect_limit = 16383;
}
else {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 230:
*
* "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
* must be an even number"
*/
assert(y % 2 == 0);
 
rect_limit = 8191;
}
 
if (x > rect_limit) x = rect_limit;
if (y > rect_limit) y = rect_limit;
if (xmax > rect_limit) xmax = rect_limit;
if (ymax > rect_limit) ymax = rect_limit;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, y << 16 | x);
ilo_cp_write(cp, ymax << 16 | xmax);
 
/*
* There is no need to set the origin. It is intended to support front
* buffer rendering.
*/
ilo_cp_write(cp, 0);
 
ilo_cp_end(cp);
}
 
struct ilo_zs_surface_info {
int surface_type;
int format;
 
struct {
struct intel_bo *bo;
unsigned stride;
enum intel_tiling_mode tiling;
uint32_t offset;
} zs, stencil, hiz;
 
unsigned width, height, depth;
unsigned lod, first_layer, num_layers;
uint32_t x_offset, y_offset;
};
 
static void
zs_init_info_null(const struct ilo_dev_info *dev,
struct ilo_zs_surface_info *info)
{
ILO_GPE_VALID_GEN(dev, 6, 7);
 
memset(info, 0, sizeof(*info));
 
info->surface_type = BRW_SURFACE_NULL;
info->format = BRW_DEPTHFORMAT_D32_FLOAT;
info->width = 1;
info->height = 1;
info->depth = 1;
info->num_layers = 1;
}
 
static void
zs_init_info(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned level,
unsigned first_layer, unsigned num_layers,
struct ilo_zs_surface_info *info)
{
const bool rebase_layer = true;
struct intel_bo * const hiz_bo = NULL;
bool separate_stencil;
uint32_t x_offset[3], y_offset[3];
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
memset(info, 0, sizeof(*info));
 
info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
 
if (info->surface_type == BRW_SURFACE_CUBE) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
*
* "For Other Surfaces (Cube Surfaces):
* This field (Minimum Array Element) is ignored."
*
* "For Other Surfaces (Cube Surfaces):
* This field (Render Target View Extent) is ignored."
*
* As such, we cannot set first_layer and num_layers on cube surfaces.
* To work around that, treat it as a 2D surface.
*/
info->surface_type = BRW_SURFACE_2D;
}
 
if (dev->gen >= ILO_GEN(7)) {
separate_stencil = true;
}
else {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
*
* "This field (Separate Stencil Buffer Enable) must be set to the
* same value (enabled or disabled) as Hierarchical Depth Buffer
* Enable."
*/
separate_stencil = (hiz_bo != NULL);
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 317:
*
* "If this field (Hierarchical Depth Buffer Enable) is enabled, the
* Surface Format of the depth buffer cannot be
* D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
* requires the separate stencil buffer."
*
* From the Ironlake PRM, volume 2 part 1, page 330:
*
* "If this field (Separate Stencil Buffer Enable) is disabled, the
* Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
*
* There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
* is indeed used, the depth values output by the fragment shaders will
* be different when read back.
*
* As for GEN7+, separate_stencil is always true.
*/
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
info->format = BRW_DEPTHFORMAT_D16_UNORM;
break;
case PIPE_FORMAT_Z32_FLOAT:
info->format = BRW_DEPTHFORMAT_D32_FLOAT;
break;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
info->format = (separate_stencil) ?
BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
info->format = (separate_stencil) ?
BRW_DEPTHFORMAT_D32_FLOAT :
BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
break;
case PIPE_FORMAT_S8_UINT:
if (separate_stencil) {
info->format = BRW_DEPTHFORMAT_D32_FLOAT;
break;
}
/* fall through */
default:
assert(!"unsupported depth/stencil format");
zs_init_info_null(dev, info);
return;
break;
}
 
if (format != PIPE_FORMAT_S8_UINT) {
info->zs.bo = tex->bo;
info->zs.stride = tex->bo_stride;
info->zs.tiling = tex->tiling;
 
if (rebase_layer) {
info->zs.offset = ilo_texture_get_slice_offset(tex,
level, first_layer, &x_offset[0], &y_offset[0]);
}
}
 
if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
const struct ilo_texture *s8_tex =
(tex->separate_s8) ? tex->separate_s8 : tex;
 
info->stencil.bo = s8_tex->bo;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 329:
*
* "The pitch must be set to 2x the value computed based on width,
* as the stencil buffer is stored with two rows interleaved."
*
* According to the classic driver, we need to do the same for GEN7+
* even though the Ivy Bridge PRM does not say anything about it.
*/
info->stencil.stride = s8_tex->bo_stride * 2;
 
info->stencil.tiling = s8_tex->tiling;
 
if (rebase_layer) {
info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
level, first_layer, &x_offset[1], &y_offset[1]);
}
}
 
if (hiz_bo) {
info->hiz.bo = hiz_bo;
info->hiz.stride = 0;
info->hiz.tiling = 0;
info->hiz.offset = 0;
x_offset[2] = 0;
y_offset[2] = 0;
}
 
info->width = tex->base.width0;
info->height = tex->base.height0;
info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
tex->base.depth0 : num_layers;
 
info->lod = level;
info->first_layer = first_layer;
info->num_layers = num_layers;
 
if (rebase_layer) {
/* the size of the layer */
info->width = u_minify(info->width, level);
info->height = u_minify(info->height, level);
if (info->surface_type == BRW_SURFACE_3D)
info->depth = u_minify(info->depth, level);
else
info->depth = 1;
 
/* no layered rendering */
assert(num_layers == 1);
 
info->lod = 0;
info->first_layer = 0;
info->num_layers = 1;
 
/* all three share the same X/Y offsets */
if (info->zs.bo) {
if (info->stencil.bo) {
assert(x_offset[0] == x_offset[1]);
assert(y_offset[0] == y_offset[1]);
}
 
info->x_offset = x_offset[0];
info->y_offset = y_offset[0];
}
else {
assert(info->stencil.bo);
 
info->x_offset = x_offset[1];
info->y_offset = y_offset[1];
}
 
if (info->hiz.bo) {
assert(info->x_offset == x_offset[2]);
assert(info->y_offset == y_offset[2]);
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 326:
*
* "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
* Coordinate Offset X) must be zero to ensure correct alignment"
*
* XXX Skip the check for gen6, which seems to be fine. We need to make
* sure that does not happen eventually.
*/
if (dev->gen >= ILO_GEN(7)) {
assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0);
info->x_offset &= ~7;
info->y_offset &= ~7;
}
 
info->width += info->x_offset;
info->height += info->y_offset;
 
/* we have to treat them as 2D surfaces */
if (info->surface_type == BRW_SURFACE_CUBE) {
assert(tex->base.width0 == tex->base.height0);
/* we will set slice_offset to point to the single face */
info->surface_type = BRW_SURFACE_2D;
}
else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
assert(tex->base.height0 == 1);
info->surface_type = BRW_SURFACE_2D;
}
}
}
 
void
ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned level,
unsigned first_layer, unsigned num_layers,
struct ilo_zs_surface *zs)
{
const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
struct ilo_zs_surface_info info;
uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
if (tex)
zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
else
zs_init_info_null(dev, &info);
 
switch (info.surface_type) {
case BRW_SURFACE_NULL:
break;
case BRW_SURFACE_1D:
assert(info.width <= max_2d_size && info.height == 1 &&
info.depth <= max_array_size);
assert(info.first_layer < max_array_size - 1 &&
info.num_layers <= max_array_size);
break;
case BRW_SURFACE_2D:
assert(info.width <= max_2d_size && info.height <= max_2d_size &&
info.depth <= max_array_size);
assert(info.first_layer < max_array_size - 1 &&
info.num_layers <= max_array_size);
break;
case BRW_SURFACE_3D:
assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
assert(info.x_offset == 0 && info.y_offset == 0);
break;
case BRW_SURFACE_CUBE:
assert(info.width <= max_2d_size && info.height <= max_2d_size &&
info.depth == 1);
assert(info.first_layer == 0 && info.num_layers == 1);
assert(info.width == info.height);
assert(info.x_offset == 0 && info.y_offset == 0);
break;
default:
assert(!"unexpected depth surface type");
break;
}
 
dw1 = info.surface_type << 29 |
info.format << 18;
 
if (info.zs.bo) {
/* required for GEN6+ */
assert(info.zs.tiling == INTEL_TILING_Y);
assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
info.zs.stride % 128 == 0);
assert(info.width <= info.zs.stride);
 
dw1 |= (info.zs.stride - 1);
dw2 = info.zs.offset;
}
else {
dw2 = 0;
}
 
if (dev->gen >= ILO_GEN(7)) {
if (info.zs.bo)
dw1 |= 1 << 28;
 
if (info.stencil.bo)
dw1 |= 1 << 27;
 
if (info.hiz.bo)
dw1 |= 1 << 22;
 
dw3 = (info.height - 1) << 18 |
(info.width - 1) << 4 |
info.lod;
 
dw4 = (info.depth - 1) << 21 |
info.first_layer << 10;
 
dw5 = info.y_offset << 16 | info.x_offset;
 
dw6 = (info.num_layers - 1) << 21;
}
else {
/* always Y-tiled */
dw1 |= 1 << 27 |
1 << 26;
 
if (info.hiz.bo) {
dw1 |= 1 << 22 |
1 << 21;
}
 
dw3 = (info.height - 1) << 19 |
(info.width - 1) << 6 |
info.lod << 2 |
BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
 
dw4 = (info.depth - 1) << 21 |
info.first_layer << 10 |
(info.num_layers - 1) << 1;
 
dw5 = info.y_offset << 16 | info.x_offset;
 
dw6 = 0;
}
 
STATIC_ASSERT(Elements(zs->payload) >= 10);
 
zs->payload[0] = dw1;
zs->payload[1] = dw2;
zs->payload[2] = dw3;
zs->payload[3] = dw4;
zs->payload[4] = dw5;
zs->payload[5] = dw6;
 
/* do not increment reference count */
zs->bo = info.zs.bo;
 
/* separate stencil */
if (info.stencil.bo) {
assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
info.stencil.stride % 128 == 0);
 
zs->payload[6] = info.stencil.stride - 1;
zs->payload[7] = info.stencil.offset;
 
/* do not increment reference count */
zs->separate_s8_bo = info.stencil.bo;
}
else {
zs->payload[6] = 0;
zs->payload[7] = 0;
zs->separate_s8_bo = NULL;
}
 
/* hiz */
if (info.hiz.bo) {
zs->payload[8] = info.hiz.stride - 1;
zs->payload[9] = info.hiz.offset;
 
/* do not increment reference count */
zs->hiz_bo = info.hiz.bo;
}
else {
zs->payload[8] = 0;
zs->payload[9] = 0;
zs->hiz_bo = NULL;
}
}
 
static void
gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
struct ilo_cp *cp)
{
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
const uint8_t cmd_len = 7;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, zs->payload[0]);
ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_write(cp, zs->payload[2]);
ilo_cp_write(cp, zs->payload[3]);
ilo_cp_write(cp, zs->payload[4]);
ilo_cp_write(cp, zs->payload[5]);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
int x_offset, int y_offset,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
const uint8_t cmd_len = 2;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
assert(x_offset >= 0 && x_offset <= 31);
assert(y_offset >= 0 && y_offset <= 31);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, x_offset << 8 | y_offset);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
const struct pipe_poly_stipple *pattern,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
const uint8_t cmd_len = 33;
int i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
assert(Elements(pattern->stipple) == 32);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
for (i = 0; i < 32; i++)
ilo_cp_write(cp, pattern->stipple[i]);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
unsigned pattern, unsigned factor,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
const uint8_t cmd_len = 3;
unsigned inverse;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
assert((pattern & 0xffff) == pattern);
assert(factor >= 1 && factor <= 256);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, pattern);
 
if (dev->gen >= ILO_GEN(7)) {
/* in U1.16 */
inverse = (unsigned) (65536.0f / factor);
ilo_cp_write(cp, inverse << 15 | factor);
}
else {
/* in U1.13 */
inverse = (unsigned) (8192.0f / factor);
ilo_cp_write(cp, inverse << 16 | factor);
}
 
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
const uint8_t cmd_len = 3;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0 << 16 | 0);
ilo_cp_write(cp, 0 << 16 | 0);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
int index, unsigned svbi,
unsigned max_svbi,
bool load_vertex_count,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
const uint8_t cmd_len = 4;
uint32_t dw1;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
assert(index >= 0 && index < 4);
 
dw1 = index << SVB_INDEX_SHIFT;
if (load_vertex_count)
dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, svbi);
ilo_cp_write(cp, max_svbi);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
int num_samples,
const uint32_t *packed_sample_pos,
bool pixel_location_center,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
uint32_t dw1, dw2, dw3;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
dw1 = (pixel_location_center) ?
MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
 
switch (num_samples) {
case 0:
case 1:
dw1 |= MS_NUMSAMPLES_1;
dw2 = 0;
dw3 = 0;
break;
case 4:
dw1 |= MS_NUMSAMPLES_4;
dw2 = packed_sample_pos[0];
dw3 = 0;
break;
case 8:
assert(dev->gen >= ILO_GEN(7));
dw1 |= MS_NUMSAMPLES_8;
dw2 = packed_sample_pos[0];
dw3 = packed_sample_pos[1];
break;
default:
assert(!"unsupported sample count");
dw1 |= MS_NUMSAMPLES_1;
dw2 = 0;
dw3 = 0;
break;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, dw2);
if (dev->gen >= ILO_GEN(7))
ilo_cp_write(cp, dw3);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
struct ilo_cp *cp)
{
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
ILO_GPE_CMD(0x3, 0x0, 0x06) :
ILO_GPE_CMD(0x3, 0x1, 0x0e);
const uint8_t cmd_len = 3;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
/* see ilo_gpe_init_zs_surface() */
ilo_cp_write(cp, zs->payload[6]);
ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
struct ilo_cp *cp)
{
const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
ILO_GPE_CMD(0x3, 0x0, 0x07) :
ILO_GPE_CMD(0x3, 0x1, 0x0f);
const uint8_t cmd_len = 3;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
/* see ilo_gpe_init_zs_surface() */
ilo_cp_write(cp, zs->payload[8]);
ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
uint32_t clear_val,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
const uint8_t cmd_len = 2;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) |
GEN5_DEPTH_CLEAR_VALID);
ilo_cp_write(cp, clear_val);
ilo_cp_end(cp);
}
 
static void
gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
uint32_t dw1,
struct intel_bo *bo, uint32_t bo_offset,
bool write_qword,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
const uint8_t cmd_len = (write_qword) ? 5 : 4;
const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
if (dw1 & PIPE_CONTROL_CS_STALL) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 73:
*
* "1 of the following must also be set (when CS stall is set):
*
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)
* * Render Target Cache Flush Enable ([12] of DW1)
* * Notify Enable ([8] of DW1)"
*
* From the Ivy Bridge PRM, volume 2 part 1, page 61:
*
* "One of the following must also be set (when CS stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)
* * Stall at Pixel Scoreboard ([1] of DW1)
* * Depth Stall ([13] of DW1)
* * Post-Sync Operation ([13] of DW1)"
*/
uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_DEPTH_STALL;
 
/* post-sync op */
bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
PIPE_CONTROL_WRITE_DEPTH_COUNT |
PIPE_CONTROL_WRITE_TIMESTAMP;
 
if (dev->gen == ILO_GEN(6))
bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
 
assert(dw1 & bit_test);
}
 
if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 73:
*
* "Following bits must be clear (when Depth Stall is set):
*
* * Render Target Cache Flush Enable ([12] of DW1)
* * Depth Cache Flush Enable ([0] of DW1)"
*/
assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
ilo_cp_write(cp, 0);
if (write_qword)
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
}
 
static void
gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
const struct pipe_draw_info *info,
const struct ilo_ib_state *ib,
bool rectlist,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
const uint8_t cmd_len = 6;
const int prim = (rectlist) ?
_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
const int vb_access = (info->indexed) ?
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
const uint32_t vb_start = info->start +
((info->indexed) ? ib->draw_start_offset : 0);
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2) |
prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
vb_access);
ilo_cp_write(cp, info->count);
ilo_cp_write(cp, vb_start);
ilo_cp_write(cp, info->instance_count);
ilo_cp_write(cp, info->start_instance);
ilo_cp_write(cp, info->index_bias);
ilo_cp_end(cp);
}
 
static uint32_t
gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
const struct ilo_shader_state **cs,
uint32_t *sampler_state,
int *num_samplers,
uint32_t *binding_table_state,
int *num_surfaces,
int num_ids,
struct ilo_cp *cp)
{
/*
* From the Sandy Bridge PRM, volume 2 part 2, page 34:
*
* "(Interface Descriptor Total Length) This field must have the same
* alignment as the Interface Descriptor Data Start Address.
*
* It must be DQWord (32-byte) aligned..."
*
* From the Sandy Bridge PRM, volume 2 part 2, page 35:
*
* "(Interface Descriptor Data Start Address) Specifies the 32-byte
* aligned address of the Interface Descriptor data."
*/
const int state_align = 32 / 4;
const int state_len = (32 / 4) * num_ids;
uint32_t state_offset, *dw;
int i;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
state_len, state_align, &state_offset);
 
for (i = 0; i < num_ids; i++) {
dw[0] = ilo_shader_get_kernel_offset(cs[i]);
dw[1] = 1 << 18; /* SPF */
dw[2] = sampler_state[i] |
(num_samplers[i] + 3) / 4 << 2;
dw[3] = binding_table_state[i] |
num_surfaces[i];
dw[4] = 0 << 16 | /* CURBE Read Length */
0; /* CURBE Read Offset */
dw[5] = 0; /* Barrier ID */
dw[6] = 0;
dw[7] = 0;
 
dw += 8;
}
 
return state_offset;
}
 
static void
viewport_get_guardband(const struct ilo_dev_info *dev,
int center_x, int center_y,
int *min_gbx, int *max_gbx,
int *min_gby, int *max_gby)
{
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 234:
*
* "Per-Device Guardband Extents
*
* - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
* - Maximum Post-Clamp Delta (X or Y): 16K"
*
* "In addition, in order to be correctly rendered, objects must have a
* screenspace bounding box not exceeding 8K in the X or Y direction.
* This additional restriction must also be comprehended by software,
* i.e., enforced by use of clipping."
*
* From the Ivy Bridge PRM, volume 2 part 1, page 248:
*
* "Per-Device Guardband Extents
*
* - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
* - Maximum Post-Clamp Delta (X or Y): N/A"
*
* "In addition, in order to be correctly rendered, objects must have a
* screenspace bounding box not exceeding 8K in the X or Y direction.
* This additional restriction must also be comprehended by software,
* i.e., enforced by use of clipping."
*
* Combined, the bounding box of any object can not exceed 8K in both
* width and height.
*
* Below we set the guardband as a squre of length 8K, centered at where
* the viewport is. This makes sure all objects passing the GB test are
* valid to the renderer, and those failing the XY clipping have a
* better chance of passing the GB test.
*/
const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
const int half_len = 8192 / 2;
 
/* make sure the guardband is within the valid range */
if (center_x - half_len < -max_extent)
center_x = -max_extent + half_len;
else if (center_x + half_len > max_extent - 1)
center_x = max_extent - half_len;
 
if (center_y - half_len < -max_extent)
center_y = -max_extent + half_len;
else if (center_y + half_len > max_extent - 1)
center_y = max_extent - half_len;
 
*min_gbx = (float) (center_x - half_len);
*max_gbx = (float) (center_x + half_len);
*min_gby = (float) (center_y - half_len);
*max_gby = (float) (center_y + half_len);
}
 
void
ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
const struct pipe_viewport_state *state,
struct ilo_viewport_cso *vp)
{
const float scale_x = fabs(state->scale[0]);
const float scale_y = fabs(state->scale[1]);
const float scale_z = fabs(state->scale[2]);
int min_gbx, max_gbx, min_gby, max_gby;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
viewport_get_guardband(dev,
(int) state->translate[0],
(int) state->translate[1],
&min_gbx, &max_gbx, &min_gby, &max_gby);
 
/* matrix form */
vp->m00 = state->scale[0];
vp->m11 = state->scale[1];
vp->m22 = state->scale[2];
vp->m30 = state->translate[0];
vp->m31 = state->translate[1];
vp->m32 = state->translate[2];
 
/* guardband in NDC space */
vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
 
/* viewport in screen space */
vp->min_x = scale_x * -1.0f + state->translate[0];
vp->max_x = scale_x * 1.0f + state->translate[0];
vp->min_y = scale_y * -1.0f + state->translate[1];
vp->max_y = scale_y * 1.0f + state->translate[1];
vp->min_z = scale_z * -1.0f + state->translate[2];
vp->max_z = scale_z * 1.0f + state->translate[2];
}
 
static uint32_t
gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = 8 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 262:
*
* "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
* stored as an array of up to 16 elements..."
*/
assert(num_viewports && num_viewports <= 16);
 
dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
state_len, state_align, &state_offset);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->m00);
dw[1] = fui(vp->m11);
dw[2] = fui(vp->m22);
dw[3] = fui(vp->m30);
dw[4] = fui(vp->m31);
dw[5] = fui(vp->m32);
dw[6] = 0;
dw[7] = 0;
 
dw += 8;
}
 
return state_offset;
}
 
static uint32_t
gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = 4 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 193:
*
* "The viewport-related state is stored as an array of up to 16
* elements..."
*/
assert(num_viewports && num_viewports <= 16);
 
dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
state_len, state_align, &state_offset);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->min_gbx);
dw[1] = fui(vp->max_gbx);
dw[2] = fui(vp->min_gby);
dw[3] = fui(vp->max_gby);
 
dw += 4;
}
 
return state_offset;
}
 
static uint32_t
gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = 2 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 385:
*
* "The viewport state is stored as an array of up to 16 elements..."
*/
assert(num_viewports && num_viewports <= 16);
 
dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
state_len, state_align, &state_offset);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->min_z);
dw[1] = fui(vp->max_z);
 
dw += 2;
}
 
return state_offset;
}
 
static uint32_t
gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
const struct pipe_stencil_ref *stencil_ref,
float alpha_ref,
const struct pipe_blend_color *blend_color,
struct ilo_cp *cp)
{
const int state_align = 64 / 4;
const int state_len = 6;
uint32_t state_offset, *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
state_len, state_align, &state_offset);
 
dw[0] = stencil_ref->ref_value[0] << 24 |
stencil_ref->ref_value[1] << 16 |
BRW_ALPHATEST_FORMAT_UNORM8;
dw[1] = float_to_ubyte(alpha_ref);
dw[2] = fui(blend_color->color[0]);
dw[3] = fui(blend_color->color[1]);
dw[4] = fui(blend_color->color[2]);
dw[5] = fui(blend_color->color[3]);
 
return state_offset;
}
 
static int
gen6_blend_factor_dst_alpha_forced_one(int factor)
{
switch (factor) {
case BRW_BLENDFACTOR_DST_ALPHA:
return BRW_BLENDFACTOR_ONE;
case BRW_BLENDFACTOR_INV_DST_ALPHA:
case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
return BRW_BLENDFACTOR_ZERO;
default:
return factor;
}
}
 
static uint32_t
blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
const struct pipe_rt_blend_state *rt,
bool dst_alpha_forced_one)
{
int rgb_src, rgb_dst, a_src, a_dst;
uint32_t dw;
 
if (!rt->blend_enable)
return 0;
 
rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
 
if (dst_alpha_forced_one) {
rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
}
 
dw = 1 << 31 |
gen6_translate_pipe_blend(rt->alpha_func) << 26 |
a_src << 20 |
a_dst << 15 |
gen6_translate_pipe_blend(rt->rgb_func) << 11 |
rgb_src << 5 |
rgb_dst;
 
if (rt->rgb_func != rt->alpha_func ||
rgb_src != a_src || rgb_dst != a_dst)
dw |= 1 << 30;
 
return dw;
}
 
void
ilo_gpe_init_blend(const struct ilo_dev_info *dev,
const struct pipe_blend_state *state,
struct ilo_blend_state *blend)
{
unsigned num_cso, i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
if (state->independent_blend_enable) {
num_cso = Elements(blend->cso);
}
else {
memset(blend->cso, 0, sizeof(blend->cso));
num_cso = 1;
}
 
blend->independent_blend_enable = state->independent_blend_enable;
blend->alpha_to_coverage = state->alpha_to_coverage;
blend->dual_blend = false;
 
for (i = 0; i < num_cso; i++) {
const struct pipe_rt_blend_state *rt = &state->rt[i];
struct ilo_blend_cso *cso = &blend->cso[i];
bool dual_blend;
 
cso->payload[0] = 0;
cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
0x3;
 
if (!(rt->colormask & PIPE_MASK_A))
cso->payload[1] |= 1 << 27;
if (!(rt->colormask & PIPE_MASK_R))
cso->payload[1] |= 1 << 26;
if (!(rt->colormask & PIPE_MASK_G))
cso->payload[1] |= 1 << 25;
if (!(rt->colormask & PIPE_MASK_B))
cso->payload[1] |= 1 << 24;
 
if (state->dither)
cso->payload[1] |= 1 << 12;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 365:
*
* "Color Buffer Blending and Logic Ops must not be enabled
* simultaneously, or behavior is UNDEFINED."
*
* Since state->logicop_enable takes precedence over rt->blend_enable,
* no special care is needed.
*/
if (state->logicop_enable) {
cso->dw_logicop = 1 << 22 |
gen6_translate_pipe_logicop(state->logicop_func) << 18;
 
cso->dw_blend = 0;
cso->dw_blend_dst_alpha_forced_one = 0;
 
dual_blend = false;
}
else {
cso->dw_logicop = 0;
 
cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
cso->dw_blend_dst_alpha_forced_one =
blend_get_rt_blend_enable(dev, rt, true);
 
dual_blend = (rt->blend_enable &&
util_blend_state_is_dual(state, i));
}
 
cso->dw_alpha_mod = 0;
 
if (state->alpha_to_coverage) {
cso->dw_alpha_mod |= 1 << 31;
 
if (dev->gen >= ILO_GEN(7))
cso->dw_alpha_mod |= 1 << 29;
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 378:
*
* "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
* must be disabled."
*/
if (state->alpha_to_one && !dual_blend)
cso->dw_alpha_mod |= 1 << 30;
 
if (dual_blend)
blend->dual_blend = true;
}
}
 
static uint32_t
gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
const struct ilo_blend_state *blend,
const struct ilo_fb_state *fb,
const struct pipe_alpha_state *alpha,
struct ilo_cp *cp)
{
const int state_align = 64 / 4;
int state_len;
uint32_t state_offset, *dw;
unsigned num_targets, i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 376:
*
* "The blend state is stored as an array of up to 8 elements..."
*/
num_targets = fb->state.nr_cbufs;
assert(num_targets <= 8);
 
if (!num_targets) {
if (!alpha->enabled)
return 0;
/* to be able to reference alpha func */
num_targets = 1;
}
 
state_len = 2 * num_targets;
 
dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
state_len, state_align, &state_offset);
 
for (i = 0; i < num_targets; i++) {
const unsigned idx = (blend->independent_blend_enable) ? i : 0;
const struct ilo_blend_cso *cso = &blend->cso[idx];
const int num_samples = fb->num_samples;
const struct util_format_description *format_desc =
(idx < fb->state.nr_cbufs) ?
util_format_description(fb->state.cbufs[idx]->format) : NULL;
bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
 
rt_is_unorm = true;
rt_is_pure_integer = false;
rt_dst_alpha_forced_one = false;
 
if (format_desc) {
int ch;
 
switch (format_desc->format) {
case PIPE_FORMAT_B8G8R8X8_UNORM:
/* force alpha to one when the HW format has alpha */
assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
== BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
rt_dst_alpha_forced_one = true;
break;
default:
break;
}
 
for (ch = 0; ch < 4; ch++) {
if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
continue;
 
if (format_desc->channel[ch].pure_integer) {
rt_is_unorm = false;
rt_is_pure_integer = true;
break;
}
 
if (!format_desc->channel[ch].normalized ||
format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
rt_is_unorm = false;
}
}
 
dw[0] = cso->payload[0];
dw[1] = cso->payload[1];
 
if (!rt_is_pure_integer) {
if (rt_dst_alpha_forced_one)
dw[0] |= cso->dw_blend_dst_alpha_forced_one;
else
dw[0] |= cso->dw_blend;
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 365:
*
* "Logic Ops are only supported on *_UNORM surfaces (excluding
* _SRGB variants), otherwise Logic Ops must be DISABLED."
*
* Since logicop is ignored for non-UNORM color buffers, no special care
* is needed.
*/
if (rt_is_unorm)
dw[1] |= cso->dw_logicop;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 356:
*
* "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
* Dither both must be disabled."
*
* There is no such limitation on GEN7, or for AlphaToOne. But GL
* requires that anyway.
*/
if (num_samples > 1)
dw[1] |= cso->dw_alpha_mod;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 382:
*
* "Alpha Test can only be enabled if Pixel Shader outputs a float
* alpha value."
*/
if (alpha->enabled && !rt_is_pure_integer) {
dw[1] |= 1 << 16 |
gen6_translate_dsa_func(alpha->func) << 13;
}
 
dw += 2;
}
 
return state_offset;
}
 
void
ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
const struct pipe_depth_stencil_alpha_state *state,
struct ilo_dsa_state *dsa)
{
const struct pipe_depth_state *depth = &state->depth;
const struct pipe_stencil_state *stencil0 = &state->stencil[0];
const struct pipe_stencil_state *stencil1 = &state->stencil[1];
uint32_t *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/* copy alpha state for later use */
dsa->alpha = state->alpha;
 
STATIC_ASSERT(Elements(dsa->payload) >= 3);
dw = dsa->payload;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 359:
*
* "If the Depth Buffer is either undefined or does not have a surface
* format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
* stencil buffer is disabled, Stencil Test Enable must be DISABLED"
*
* From the Sandy Bridge PRM, volume 2 part 1, page 370:
*
* "This field (Stencil Test Enable) cannot be enabled if
* Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
*
* TODO We do not check these yet.
*/
if (stencil0->enabled) {
dw[0] = 1 << 31 |
gen6_translate_dsa_func(stencil0->func) << 28 |
gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
if (stencil0->writemask)
dw[0] |= 1 << 18;
 
dw[1] = stencil0->valuemask << 24 |
stencil0->writemask << 16;
 
if (stencil1->enabled) {
dw[0] |= 1 << 15 |
gen6_translate_dsa_func(stencil1->func) << 12 |
gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
if (stencil1->writemask)
dw[0] |= 1 << 18;
 
dw[1] |= stencil1->valuemask << 8 |
stencil1->writemask;
}
}
else {
dw[0] = 0;
dw[1] = 0;
}
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 360:
*
* "Enabling the Depth Test function without defining a Depth Buffer is
* UNDEFINED."
*
* From the Sandy Bridge PRM, volume 2 part 1, page 375:
*
* "A Depth Buffer must be defined before enabling writes to it, or
* operation is UNDEFINED."
*
* TODO We do not check these yet.
*/
dw[2] = depth->enabled << 31 |
depth->writemask << 26;
if (depth->enabled)
dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
else
dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
}
 
static uint32_t
gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
const struct ilo_dsa_state *dsa,
struct ilo_cp *cp)
{
const int state_align = 64 / 4;
const int state_len = 3;
uint32_t state_offset, *dw;
 
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
state_len, state_align, &state_offset);
 
dw[0] = dsa->payload[0];
dw[1] = dsa->payload[1];
dw[2] = dsa->payload[2];
 
return state_offset;
}
 
void
ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
unsigned start_slot,
unsigned num_states,
const struct pipe_scissor_state *states,
struct ilo_scissor_state *scissor)
{
unsigned i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
for (i = 0; i < num_states; i++) {
uint16_t min_x, min_y, max_x, max_y;
 
/* both max and min are inclusive in SCISSOR_RECT */
if (states[i].minx < states[i].maxx &&
states[i].miny < states[i].maxy) {
min_x = states[i].minx;
min_y = states[i].miny;
max_x = states[i].maxx - 1;
max_y = states[i].maxy - 1;
}
else {
/* we have to make min greater than max */
min_x = 1;
min_y = 1;
max_x = 0;
max_y = 0;
}
 
scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
}
 
if (!start_slot && num_states)
scissor->scissor0 = states[0];
}
 
void
ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
struct ilo_scissor_state *scissor)
{
unsigned i;
 
for (i = 0; i < Elements(scissor->payload); i += 2) {
scissor->payload[i + 0] = 1 << 16 | 1;
scissor->payload[i + 1] = 0;
}
}
 
static uint32_t
gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
const struct ilo_scissor_state *scissor,
unsigned num_viewports,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = 2 * num_viewports;
uint32_t state_offset, *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 263:
*
* "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
* stored as an array of up to 16 elements..."
*/
assert(num_viewports && num_viewports <= 16);
 
dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
state_len, state_align, &state_offset);
 
memcpy(dw, scissor->payload, state_len * 4);
 
return state_offset;
}
 
static uint32_t
gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
uint32_t *surface_states,
int num_surface_states,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = num_surface_states;
uint32_t state_offset, *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 69:
*
* "It is stored as an array of up to 256 elements..."
*/
assert(num_surface_states <= 256);
 
if (!num_surface_states)
return 0;
 
dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
state_len, state_align, &state_offset);
memcpy(dw, surface_states,
num_surface_states * sizeof(surface_states[0]));
 
return state_offset;
}
 
void
ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
uint32_t *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 71:
*
* "A null surface will be used in instances where an actual surface is
* not bound. When a write message is generated to a null surface, no
* actual surface is written to. When a read message (including any
* sampling engine message) is generated to a null surface, the result
* is all zeros. Note that a null surface type is allowed to be used
* with all messages, even if it is not specificially indicated as
* supported. All of the remaining fields in surface state are ignored
* for null surfaces, with the following exceptions:
*
* * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
* depth buffer's corresponding state for all render target
* surfaces, including null.
* * Surface Format must be R8G8B8A8_UNORM."
*
* From the Sandy Bridge PRM, volume 4 part 1, page 82:
*
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
* true"
*/
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
 
dw[1] = 0;
 
dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
(width - 1) << BRW_SURFACE_WIDTH_SHIFT |
level << BRW_SURFACE_LOD_SHIFT;
 
dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
BRW_SURFACE_TILED;
 
dw[4] = 0;
dw[5] = 0;
 
surf->bo = NULL;
}
 
void
ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
const int elem_size = util_format_get_blocksize(elem_format);
int width, height, depth, pitch;
int surface_format, num_entries;
uint32_t *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
/*
* For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
* structure in a buffer.
*/
 
surface_format = ilo_translate_color_format(elem_format);
 
num_entries = size / struct_size;
/* see if there is enough space to fit another element */
if (size % struct_size >= elem_size)
num_entries++;
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 76:
*
* "For SURFTYPE_BUFFER render targets, this field (Surface Base
* Address) specifies the base address of first element of the
* surface. The surface is interpreted as a simple array of that
* single element type. The address must be naturally-aligned to the
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
* must be 16-byte aligned).
*
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
* the base address of the first element of the surface, computed in
* software by adding the surface base address to the byte offset of
* the element in the buffer."
*/
if (is_rt)
assert(offset % elem_size == 0);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 77:
*
* "For buffer surfaces, the number of entries in the buffer ranges
* from 1 to 2^27."
*/
assert(num_entries >= 1 && num_entries <= 1 << 27);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
* indicates the size of the structure."
*/
pitch = struct_size;
 
pitch--;
num_entries--;
/* bits [6:0] */
width = (num_entries & 0x0000007f);
/* bits [19:7] */
height = (num_entries & 0x000fff80) >> 7;
/* bits [26:20] */
depth = (num_entries & 0x07f00000) >> 20;
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
surface_format << BRW_SURFACE_FORMAT_SHIFT;
if (render_cache_rw)
dw[0] |= BRW_SURFACE_RC_READ_WRITE;
 
dw[1] = offset;
 
dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
width << BRW_SURFACE_WIDTH_SHIFT;
 
dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
pitch << BRW_SURFACE_PITCH_SHIFT;
 
dw[4] = 0;
dw[5] = 0;
 
/* do not increment reference count */
surf->bo = buf->bo;
}
 
void
ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
int width, height, depth, pitch, lod;
unsigned layer_offset, x_offset, y_offset;
uint32_t *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
assert(surface_type != BRW_SURFACE_BUFFER);
 
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
format = PIPE_FORMAT_Z32_FLOAT;
 
if (is_rt)
surface_format = ilo_translate_render_format(format);
else
surface_format = ilo_translate_texture_format(format);
assert(surface_format >= 0);
 
width = tex->base.width0;
height = tex->base.height0;
depth = (tex->base.target == PIPE_TEXTURE_3D) ?
tex->base.depth0 : num_layers;
pitch = tex->bo_stride;
 
if (surface_type == BRW_SURFACE_CUBE) {
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
* range of this field (Depth) is [0,84], indicating the number of
* cube array elements (equal to the number of underlying 2D array
* elements divided by 6). For other surfaces, this field must be
* zero."
*
* When is_rt is true, we treat the texture as a 2D one to avoid the
* restriction.
*/
if (is_rt) {
surface_type = BRW_SURFACE_2D;
}
else {
assert(num_layers % 6 == 0);
depth = num_layers / 6;
}
}
 
/* sanity check the size */
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
switch (surface_type) {
case BRW_SURFACE_1D:
assert(width <= 8192 && height == 1 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
case BRW_SURFACE_2D:
assert(width <= 8192 && height <= 8192 && depth <= 512);
assert(first_layer < 512 && num_layers <= 512);
break;
case BRW_SURFACE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
assert(first_layer < 2048 && num_layers <= 512);
if (!is_rt)
assert(first_layer == 0);
break;
case BRW_SURFACE_CUBE:
assert(width <= 8192 && height <= 8192 && depth <= 85);
assert(width == height);
assert(first_layer < 512 && num_layers <= 512);
if (is_rt)
assert(first_layer == 0);
break;
default:
assert(!"unexpected surface type");
break;
}
 
/* non-full array spacing is supported only on GEN7+ */
assert(tex->array_spacing_full);
/* non-interleaved samples are supported only on GEN7+ */
if (tex->base.nr_samples > 1)
assert(tex->interleaved);
 
if (is_rt) {
/*
* Compute the offset to the layer manually.
*
* For rendering, the hardware requires LOD to be the same for all
* render targets and the depth buffer. We need to compute the offset
* to the layer manually and always set LOD to 0.
*/
if (true) {
/* we lose the capability for layered rendering */
assert(num_layers == 1);
 
layer_offset = ilo_texture_get_slice_offset(tex,
first_level, first_layer, &x_offset, &y_offset);
 
assert(x_offset % 4 == 0);
assert(y_offset % 2 == 0);
x_offset /= 4;
y_offset /= 2;
 
/* derive the size for the LOD */
width = u_minify(width, first_level);
height = u_minify(height, first_level);
if (surface_type == BRW_SURFACE_3D)
depth = u_minify(depth, first_level);
else
depth = 1;
 
first_level = 0;
first_layer = 0;
lod = 0;
}
else {
layer_offset = 0;
x_offset = 0;
y_offset = 0;
}
 
assert(num_levels == 1);
lod = first_level;
}
else {
layer_offset = 0;
x_offset = 0;
y_offset = 0;
 
lod = num_levels - 1;
}
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 76:
*
* "Linear render target surface base addresses must be element-size
* aligned, for non-YUV surface formats, or a multiple of 2
* element-sizes for YUV surface formats. Other linear surfaces have
* no alignment requirements (byte alignment is sufficient.)"
*
* From the Sandy Bridge PRM, volume 4 part 1, page 81:
*
* "For linear render target surfaces, the pitch must be a multiple
* of the element size for non-YUV surface formats. Pitch must be a
* multiple of 2 * element size for YUV surface formats."
*
* From the Sandy Bridge PRM, volume 4 part 1, page 86:
*
* "For linear surfaces, this field (X Offset) must be zero"
*/
if (tex->tiling == INTEL_TILING_NONE) {
if (is_rt) {
const int elem_size = util_format_get_blocksize(format);
assert(layer_offset % elem_size == 0);
assert(pitch % elem_size == 0);
}
 
assert(!x_offset);
}
 
STATIC_ASSERT(Elements(surf->payload) >= 6);
dw = surf->payload;
 
dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
surface_format << BRW_SURFACE_FORMAT_SHIFT |
BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
 
if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
dw[0] |= 1 << 9 |
BRW_SURFACE_CUBEFACE_ENABLES;
}
 
if (render_cache_rw)
dw[0] |= BRW_SURFACE_RC_READ_WRITE;
 
dw[1] = layer_offset;
 
dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
(width - 1) << BRW_SURFACE_WIDTH_SHIFT |
lod << BRW_SURFACE_LOD_SHIFT;
 
dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
(pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
 
dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
first_layer << 17 |
(num_layers - 1) << 8 |
((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
BRW_SURFACE_MULTISAMPLECOUNT_1);
 
dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
if (tex->valign_4)
dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
 
/* do not increment reference count */
surf->bo = tex->bo;
}
 
static uint32_t
gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
const struct ilo_view_surface *surf,
bool for_render,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
uint32_t state_offset;
uint32_t read_domains, write_domain;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
if (for_render) {
read_domains = INTEL_DOMAIN_RENDER;
write_domain = INTEL_DOMAIN_RENDER;
}
else {
read_domains = INTEL_DOMAIN_SAMPLER;
write_domain = 0;
}
 
ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
 
STATIC_ASSERT(Elements(surf->payload) >= 8);
 
ilo_cp_write(cp, surf->payload[0]);
ilo_cp_write_bo(cp, surf->payload[1],
surf->bo, read_domains, write_domain);
ilo_cp_write(cp, surf->payload[2]);
ilo_cp_write(cp, surf->payload[3]);
ilo_cp_write(cp, surf->payload[4]);
ilo_cp_write(cp, surf->payload[5]);
 
if (dev->gen >= ILO_GEN(7)) {
ilo_cp_write(cp, surf->payload[6]);
ilo_cp_write(cp, surf->payload[7]);
}
 
ilo_cp_end(cp);
 
return state_offset;
}
 
static uint32_t
gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
const struct pipe_stream_output_target *so,
const struct pipe_stream_output_info *so_info,
int so_index,
struct ilo_cp *cp)
{
struct ilo_buffer *buf = ilo_buffer(so->buffer);
unsigned bo_offset, struct_size;
enum pipe_format elem_format;
struct ilo_view_surface surf;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
 
switch (so_info->output[so_index].num_components) {
case 1:
elem_format = PIPE_FORMAT_R32_FLOAT;
break;
case 2:
elem_format = PIPE_FORMAT_R32G32_FLOAT;
break;
case 3:
elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
break;
case 4:
elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
break;
default:
assert(!"unexpected SO components length");
elem_format = PIPE_FORMAT_R32_FLOAT;
break;
}
 
ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
struct_size, elem_format, false, true, &surf);
 
return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
}
 
static void
sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
const union pipe_color_union *color,
uint32_t *dw, int num_dwords)
{
float rgba[4] = {
color->f[0], color->f[1], color->f[2], color->f[3],
};
 
ILO_GPE_VALID_GEN(dev, 6, 6);
 
assert(num_dwords >= 12);
 
/*
* This state is not documented in the Sandy Bridge PRM, but in the
* Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
*/
 
/* IEEE_FP */
dw[1] = fui(rgba[0]);
dw[2] = fui(rgba[1]);
dw[3] = fui(rgba[2]);
dw[4] = fui(rgba[3]);
 
/* FLOAT_16 */
dw[5] = util_float_to_half(rgba[0]) |
util_float_to_half(rgba[1]) << 16;
dw[6] = util_float_to_half(rgba[2]) |
util_float_to_half(rgba[3]) << 16;
 
/* clamp to [-1.0f, 1.0f] */
rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
 
/* SNORM16 */
dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
(int16_t) util_iround(rgba[1] * 32767.0f) << 16;
dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
(int16_t) util_iround(rgba[3] * 32767.0f) << 16;
 
/* SNORM8 */
dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
(int8_t) util_iround(rgba[1] * 127.0f) << 8 |
(int8_t) util_iround(rgba[2] * 127.0f) << 16 |
(int8_t) util_iround(rgba[3] * 127.0f) << 24;
 
/* clamp to [0.0f, 1.0f] */
rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
 
/* UNORM8 */
dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
(uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
(uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
(uint8_t) util_iround(rgba[3] * 255.0f) << 24;
 
/* UNORM16 */
dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
(uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
(uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
}
 
void
ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
const struct pipe_sampler_state *state,
struct ilo_sampler_cso *sampler)
{
int mip_filter, min_filter, mag_filter, max_aniso;
int lod_bias, max_lod, min_lod;
int wrap_s, wrap_t, wrap_r, wrap_cube;
bool clamp_is_to_edge;
uint32_t dw0, dw1, dw3;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
memset(sampler, 0, sizeof(*sampler));
 
mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
min_filter = gen6_translate_tex_filter(state->min_img_filter);
mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
 
sampler->anisotropic = state->max_anisotropy;
 
if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
max_aniso = state->max_anisotropy / 2 - 1;
else if (state->max_anisotropy > 16)
max_aniso = BRW_ANISORATIO_16;
else
max_aniso = BRW_ANISORATIO_2;
 
/*
*
* Here is how the hardware calculate per-pixel LOD, from my reading of the
* PRMs:
*
* 1) LOD is set to log2(ratio of texels to pixels) if not specified in
* other ways. The number of texels is measured using level
* SurfMinLod.
* 2) Bias is added to LOD.
* 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
* compared with Base to determine whether magnification or
* minification is needed. (if preclamp is disabled, LOD is compared
* with Base before clamping)
* 4) If magnification is needed, or no mipmapping is requested, LOD is
* set to floor(MinLod).
* 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
*
* With Gallium interface, Base is always zero and
* pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
*/
if (dev->gen >= ILO_GEN(7)) {
const float scale = 256.0f;
 
/* [-16.0, 16.0) in S4.8 */
lod_bias = (int)
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
lod_bias &= 0x1fff;
 
/* [0.0, 14.0] in U4.8 */
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
}
else {
const float scale = 64.0f;
 
/* [-16.0, 16.0) in S4.6 */
lod_bias = (int)
(CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
lod_bias &= 0x7ff;
 
/* [0.0, 13.0] in U4.6 */
max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
}
 
/*
* We want LOD to be clamped to determine magnification/minification, and
* get set to zero when it is magnification or when mipmapping is disabled.
* The hardware would set LOD to floor(MinLod) and that is a problem when
* MinLod is greater than or equal to 1.0f.
*
* With Base being zero, it is always minification when MinLod is non-zero.
* To achieve our goal, we just need to set MinLod to zero and set
* MagFilter to MinFilter when mipmapping is disabled.
*/
if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
min_lod = 0;
mag_filter = min_filter;
}
 
/*
* For nearest filtering, PIPE_TEX_WRAP_CLAMP means
* PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
* means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
* texture coordinates to [0.0, 1.0].
*
* The clamping will be taken care of in the shaders. There are two
* filters here, but let the minification one has a say.
*/
clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
if (!clamp_is_to_edge) {
sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
}
 
/* determine wrap s/t/r */
wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 107:
*
* "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
* and TEXCOORDMODE_CUBE settings are valid, and each TC component
* must have the same Address Control mode."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 96:
*
* "This field (Cube Surface Control Mode) must be set to
* CUBECTRLMODE_PROGRAMMED"
*
* Therefore, we cannot use "Cube Surface Control Mode" for semless cube
* map filtering.
*/
if (state->seamless_cube_map &&
(state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
wrap_cube = BRW_TEXCOORDMODE_CUBE;
}
else {
wrap_cube = BRW_TEXCOORDMODE_CLAMP;
}
 
if (!state->normalized_coords) {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 98:
*
* "The following state must be set as indicated if this field
* (Non-normalized Coordinate Enable) is enabled:
*
* - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
* TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
* - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
* - Mag Mode Filter must be MAPFILTER_NEAREST or
* MAPFILTER_LINEAR.
* - Min Mode Filter must be MAPFILTER_NEAREST or
* MAPFILTER_LINEAR.
* - Mip Mode Filter must be MIPFILTER_NONE.
* - Min LOD must be 0.
* - Max LOD must be 0.
* - MIP Count must be 0.
* - Surface Min LOD must be 0.
* - Texture LOD Bias must be 0."
*/
assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
 
assert(mag_filter == BRW_MAPFILTER_NEAREST ||
mag_filter == BRW_MAPFILTER_LINEAR);
assert(min_filter == BRW_MAPFILTER_NEAREST ||
min_filter == BRW_MAPFILTER_LINEAR);
 
/* work around a bug in util_blitter */
mip_filter = BRW_MIPFILTER_NONE;
 
assert(mip_filter == BRW_MIPFILTER_NONE);
}
 
if (dev->gen >= ILO_GEN(7)) {
dw0 = 1 << 28 |
mip_filter << 20 |
lod_bias << 1;
 
sampler->dw_filter = mag_filter << 17 |
min_filter << 14;
 
sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
BRW_MAPFILTER_ANISOTROPIC << 14 |
1;
 
dw1 = min_lod << 20 |
max_lod << 8;
 
if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
 
dw3 = max_aniso << 19;
 
/* round the coordinates for linear filtering */
if (min_filter != BRW_MAPFILTER_NEAREST) {
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
}
if (mag_filter != BRW_MAPFILTER_NEAREST) {
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
}
 
if (!state->normalized_coords)
dw3 |= 1 << 10;
 
sampler->dw_wrap = wrap_s << 6 |
wrap_t << 3 |
wrap_r;
 
/*
* As noted in the classic i965 driver, the HW may still reference
* wrap_t and wrap_r for 1D textures. We need to set them to a safe
* mode
*/
sampler->dw_wrap_1d = wrap_s << 6 |
BRW_TEXCOORDMODE_WRAP << 3 |
BRW_TEXCOORDMODE_WRAP;
 
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
wrap_cube;
 
STATIC_ASSERT(Elements(sampler->payload) >= 7);
 
sampler->payload[0] = dw0;
sampler->payload[1] = dw1;
sampler->payload[2] = dw3;
 
memcpy(&sampler->payload[3],
state->border_color.ui, sizeof(state->border_color.ui));
}
else {
dw0 = 1 << 28 |
mip_filter << 20 |
lod_bias << 3;
 
if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
dw0 |= gen6_translate_shadow_func(state->compare_func);
 
sampler->dw_filter = (min_filter != mag_filter) << 27 |
mag_filter << 17 |
min_filter << 14;
 
sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
BRW_MAPFILTER_ANISOTROPIC << 14;
 
dw1 = min_lod << 22 |
max_lod << 12;
 
sampler->dw_wrap = wrap_s << 6 |
wrap_t << 3 |
wrap_r;
 
sampler->dw_wrap_1d = wrap_s << 6 |
BRW_TEXCOORDMODE_WRAP << 3 |
BRW_TEXCOORDMODE_WRAP;
 
sampler->dw_wrap_cube = wrap_cube << 6 |
wrap_cube << 3 |
wrap_cube;
 
dw3 = max_aniso << 19;
 
/* round the coordinates for linear filtering */
if (min_filter != BRW_MAPFILTER_NEAREST) {
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
}
if (mag_filter != BRW_MAPFILTER_NEAREST) {
dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
}
 
if (!state->normalized_coords)
dw3 |= 1;
 
STATIC_ASSERT(Elements(sampler->payload) >= 15);
 
sampler->payload[0] = dw0;
sampler->payload[1] = dw1;
sampler->payload[2] = dw3;
 
sampler_init_border_color_gen6(dev,
&state->border_color, &sampler->payload[3], 12);
}
}
 
static uint32_t
gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
const struct ilo_sampler_cso * const *samplers,
const struct pipe_sampler_view * const *views,
const uint32_t *sampler_border_colors,
int num_samplers,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = 4 * num_samplers;
uint32_t state_offset, *dw;
int i;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 101:
*
* "The sampler state is stored as an array of up to 16 elements..."
*/
assert(num_samplers <= 16);
 
if (!num_samplers)
return 0;
 
dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
state_len, state_align, &state_offset);
 
for (i = 0; i < num_samplers; i++) {
const struct ilo_sampler_cso *sampler = samplers[i];
const struct pipe_sampler_view *view = views[i];
const uint32_t border_color = sampler_border_colors[i];
uint32_t dw_filter, dw_wrap;
 
/* there may be holes */
if (!sampler || !view) {
/* disabled sampler */
dw[0] = 1 << 31;
dw[1] = 0;
dw[2] = 0;
dw[3] = 0;
dw += 4;
 
continue;
}
 
/* determine filter and wrap modes */
switch (view->texture->target) {
case PIPE_TEXTURE_1D:
dw_filter = (sampler->anisotropic) ?
sampler->dw_filter_aniso : sampler->dw_filter;
dw_wrap = sampler->dw_wrap_1d;
break;
case PIPE_TEXTURE_3D:
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 103:
*
* "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
* surfaces of type SURFTYPE_3D."
*/
dw_filter = sampler->dw_filter;
dw_wrap = sampler->dw_wrap;
break;
case PIPE_TEXTURE_CUBE:
dw_filter = (sampler->anisotropic) ?
sampler->dw_filter_aniso : sampler->dw_filter;
dw_wrap = sampler->dw_wrap_cube;
break;
default:
dw_filter = (sampler->anisotropic) ?
sampler->dw_filter_aniso : sampler->dw_filter;
dw_wrap = sampler->dw_wrap;
break;
}
 
dw[0] = sampler->payload[0];
dw[1] = sampler->payload[1];
assert(!(border_color & 0x1f));
dw[2] = border_color;
dw[3] = sampler->payload[2];
 
dw[0] |= dw_filter;
 
if (dev->gen >= ILO_GEN(7)) {
dw[3] |= dw_wrap;
}
else {
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 21:
*
* "[DevSNB] Errata: Incorrect behavior is observed in cases
* where the min and mag mode filters are different and
* SurfMinLOD is nonzero. The determination of MagMode uses the
* following equation instead of the one in the above
* pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
*
* As a way to work around that, we set Base to
* view->u.tex.first_level.
*/
dw[0] |= view->u.tex.first_level << 22;
 
dw[1] |= dw_wrap;
}
 
dw += 4;
}
 
return state_offset;
}
 
static uint32_t
gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
const struct ilo_sampler_cso *sampler,
struct ilo_cp *cp)
{
const int state_align = 32 / 4;
const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
uint32_t state_offset, *dw;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
state_len, state_align, &state_offset);
 
/* see ilo_gpe_init_sampler_cso() */
memcpy(dw, &sampler->payload[3], state_len * 4);
 
return state_offset;
}
 
static uint32_t
gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
int size, void **pcb,
struct ilo_cp *cp)
{
/*
* For all VS, GS, FS, and CS push constant buffers, they must be aligned
* to 32 bytes, and their sizes are specified in 256-bit units.
*/
const int state_align = 32 / 4;
const int state_len = align(size, 32) / 4;
uint32_t state_offset;
char *buf;
 
ILO_GPE_VALID_GEN(dev, 6, 7);
 
buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
state_len, state_align, &state_offset);
 
/* zero out the unused range */
if (size < state_len * 4)
memset(&buf[size], 0, state_len * 4 - size);
 
if (pcb)
*pcb = buf;
 
return state_offset;
}
 
static int
gen6_estimate_command_size(const struct ilo_dev_info *dev,
enum ilo_gpe_gen6_command cmd,
int arg)
{
static const struct {
int header;
int body;
} gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
[ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
[ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
[ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
[ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
[ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
[ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
[ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
[ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
[ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
[ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
[ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
[ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
[ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
[ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
[ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
[ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
[ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
[ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
[ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
[ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
[ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
[ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
[ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
[ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
[ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
[ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
[ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
[ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
[ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
[ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
[ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
[ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
[ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
[ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
[ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
[ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
[ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
[ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
[ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
[ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
[ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
};
const int header = gen6_command_size_table[cmd].header;
const int body = gen6_command_size_table[arg].body;
const int count = arg;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
 
return (likely(count)) ? header + body * count : 0;
}
 
static int
gen6_estimate_state_size(const struct ilo_dev_info *dev,
enum ilo_gpe_gen6_state state,
int arg)
{
static const struct {
int alignment;
int body;
bool is_array;
} gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
[ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
[ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
[ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
[ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
[ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
[ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
[ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
[ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
[ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
[ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
[ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
[ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
[ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
};
const int alignment = gen6_state_size_table[state].alignment;
const int body = gen6_state_size_table[state].body;
const bool is_array = gen6_state_size_table[state].is_array;
const int count = arg;
int estimate;
 
ILO_GPE_VALID_GEN(dev, 6, 6);
assert(state < ILO_GPE_GEN6_STATE_COUNT);
 
if (likely(count)) {
if (is_array) {
estimate = (alignment - 1) + body * count;
}
else {
estimate = (alignment - 1) + body;
/* all states are aligned */
if (count > 1)
estimate += util_align_npot(body, alignment) * (count - 1);
}
}
else {
estimate = 0;
}
 
return estimate;
}
 
static const struct ilo_gpe_gen6 gen6_gpe = {
.estimate_command_size = gen6_estimate_command_size,
.estimate_state_size = gen6_estimate_state_size,
 
#define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
GEN6_SET(STATE_BASE_ADDRESS),
GEN6_SET(STATE_SIP),
GEN6_SET(3DSTATE_VF_STATISTICS),
GEN6_SET(PIPELINE_SELECT),
GEN6_SET(MEDIA_VFE_STATE),
GEN6_SET(MEDIA_CURBE_LOAD),
GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
GEN6_SET(MEDIA_GATEWAY_STATE),
GEN6_SET(MEDIA_STATE_FLUSH),
GEN6_SET(MEDIA_OBJECT_WALKER),
GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
GEN6_SET(3DSTATE_URB),
GEN6_SET(3DSTATE_VERTEX_BUFFERS),
GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
GEN6_SET(3DSTATE_INDEX_BUFFER),
GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
GEN6_SET(3DSTATE_CC_STATE_POINTERS),
GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
GEN6_SET(3DSTATE_VS),
GEN6_SET(3DSTATE_GS),
GEN6_SET(3DSTATE_CLIP),
GEN6_SET(3DSTATE_SF),
GEN6_SET(3DSTATE_WM),
GEN6_SET(3DSTATE_CONSTANT_VS),
GEN6_SET(3DSTATE_CONSTANT_GS),
GEN6_SET(3DSTATE_CONSTANT_PS),
GEN6_SET(3DSTATE_SAMPLE_MASK),
GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
GEN6_SET(3DSTATE_DEPTH_BUFFER),
GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
GEN6_SET(3DSTATE_LINE_STIPPLE),
GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
GEN6_SET(3DSTATE_GS_SVB_INDEX),
GEN6_SET(3DSTATE_MULTISAMPLE),
GEN6_SET(3DSTATE_STENCIL_BUFFER),
GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
GEN6_SET(3DSTATE_CLEAR_PARAMS),
GEN6_SET(PIPE_CONTROL),
GEN6_SET(3DPRIMITIVE),
GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
GEN6_SET(SF_VIEWPORT),
GEN6_SET(CLIP_VIEWPORT),
GEN6_SET(CC_VIEWPORT),
GEN6_SET(COLOR_CALC_STATE),
GEN6_SET(BLEND_STATE),
GEN6_SET(DEPTH_STENCIL_STATE),
GEN6_SET(SCISSOR_RECT),
GEN6_SET(BINDING_TABLE_STATE),
GEN6_SET(SURFACE_STATE),
GEN6_SET(so_SURFACE_STATE),
GEN6_SET(SAMPLER_STATE),
GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
GEN6_SET(push_constant_buffer),
#undef GEN6_SET
};
 
const struct ilo_gpe_gen6 *
ilo_gpe_gen6_get(void)
{
return &gen6_gpe;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen6.h
0,0 → 1,560
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_GPE_GEN6_H
#define ILO_GPE_GEN6_H
 
#include "ilo_common.h"
#include "ilo_gpe.h"
 
#define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
 
#define ILO_GPE_CMD(pipeline, op, subop) \
(0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
 
/**
* Commands that GEN6 GPE could emit.
*/
enum ilo_gpe_gen6_command {
ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */
ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */
ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */
ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */
ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */
ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */
ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */
ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */
ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */
ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */
ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */
ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */
ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
 
ILO_GPE_GEN6_COMMAND_COUNT,
};
 
/**
* Indirect states that GEN6 GPE could emit.
*/
enum ilo_gpe_gen6_state {
ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA,
ILO_GPE_GEN6_SF_VIEWPORT,
ILO_GPE_GEN6_CLIP_VIEWPORT,
ILO_GPE_GEN6_CC_VIEWPORT,
ILO_GPE_GEN6_COLOR_CALC_STATE,
ILO_GPE_GEN6_BLEND_STATE,
ILO_GPE_GEN6_DEPTH_STENCIL_STATE,
ILO_GPE_GEN6_SCISSOR_RECT,
ILO_GPE_GEN6_BINDING_TABLE_STATE,
ILO_GPE_GEN6_SURFACE_STATE,
ILO_GPE_GEN6_SAMPLER_STATE,
ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE,
ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER,
 
ILO_GPE_GEN6_STATE_COUNT,
};
 
enum intel_tiling_mode;
 
struct intel_bo;
struct ilo_cp;
struct ilo_texture;
struct ilo_shader;
 
typedef void
(*ilo_gpe_gen6_STATE_BASE_ADDRESS)(const struct ilo_dev_info *dev,
struct intel_bo *general_state_bo,
struct intel_bo *surface_state_bo,
struct intel_bo *dynamic_state_bo,
struct intel_bo *indirect_object_bo,
struct intel_bo *instruction_bo,
uint32_t general_state_size,
uint32_t dynamic_state_size,
uint32_t indirect_object_size,
uint32_t instruction_size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_STATE_SIP)(const struct ilo_dev_info *dev,
uint32_t sip,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_VF_STATISTICS)(const struct ilo_dev_info *dev,
bool enable,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_PIPELINE_SELECT)(const struct ilo_dev_info *dev,
int pipeline,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_MEDIA_VFE_STATE)(const struct ilo_dev_info *dev,
int max_threads, int num_urb_entries,
int urb_entry_size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_MEDIA_CURBE_LOAD)(const struct ilo_dev_info *dev,
uint32_t buf, int size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD)(const struct ilo_dev_info *dev,
uint32_t offset, int num_ids,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_MEDIA_GATEWAY_STATE)(const struct ilo_dev_info *dev,
int id, int byte, int thread_count,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_MEDIA_STATE_FLUSH)(const struct ilo_dev_info *dev,
int thread_count_water_mark,
int barrier_mask,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_MEDIA_OBJECT_WALKER)(const struct ilo_dev_info *dev,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_BINDING_TABLE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t vs_binding_table,
uint32_t gs_binding_table,
uint32_t ps_binding_table,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_SAMPLER_STATE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t vs_sampler_state,
uint32_t gs_sampler_state,
uint32_t ps_sampler_state,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_URB)(const struct ilo_dev_info *dev,
int vs_total_size, int gs_total_size,
int vs_entry_size, int gs_entry_size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS)(const struct ilo_dev_info *dev,
const struct pipe_vertex_buffer *vbuffers,
uint64_t vbuffer_mask,
const struct ilo_ve_state *ve,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS)(const struct ilo_dev_info *dev,
const struct ilo_ve_state *ve,
bool last_velement_edgeflag,
bool prepend_generated_ids,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_INDEX_BUFFER)(const struct ilo_dev_info *dev,
const struct ilo_ib_state *ib,
bool enable_cut_index,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_VIEWPORT_STATE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t clip_viewport,
uint32_t sf_viewport,
uint32_t cc_viewport,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t blend_state,
uint32_t depth_stencil_state,
uint32_t color_calc_state,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t scissor_rect,
struct ilo_cp *cp);
 
 
typedef void
(*ilo_gpe_gen6_3DSTATE_VS)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,
int num_samplers,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
const struct ilo_shader_state *vs,
int verts_per_prim,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_CLIP)(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
bool enable_guardband,
int num_viewports,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_SF)(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_WM)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
int num_samplers,
const struct ilo_rasterizer_state *rasterizer,
bool dual_blend, bool cc_may_kill,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_CONSTANT_VS)(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_CONSTANT_GS)(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_CONSTANT_PS)(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev,
unsigned sample_mask,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE)(const struct ilo_dev_info *dev,
unsigned x, unsigned y,
unsigned width, unsigned height,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER)(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET)(const struct ilo_dev_info *dev,
int x_offset, int y_offset,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN)(const struct ilo_dev_info *dev,
const struct pipe_poly_stipple *pattern,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_LINE_STIPPLE)(const struct ilo_dev_info *dev,
unsigned pattern, unsigned factor,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS)(const struct ilo_dev_info *dev,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_GS_SVB_INDEX)(const struct ilo_dev_info *dev,
int index, unsigned svbi,
unsigned max_svbi,
bool load_vertex_count,
struct ilo_cp *cp);
 
 
typedef void
(*ilo_gpe_gen6_3DSTATE_MULTISAMPLE)(const struct ilo_dev_info *dev,
int num_samples,
const uint32_t *packed_sample_pos,
bool pixel_location_center,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER)(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER)(const struct ilo_dev_info *dev,
const struct ilo_zs_surface *zs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS)(const struct ilo_dev_info *dev,
uint32_t clear_val,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_PIPE_CONTROL)(const struct ilo_dev_info *dev,
uint32_t dw1,
struct intel_bo *bo, uint32_t bo_offset,
bool write_qword,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen6_3DPRIMITIVE)(const struct ilo_dev_info *dev,
const struct pipe_draw_info *info,
const struct ilo_ib_state *ib,
bool rectlist,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA)(const struct ilo_dev_info *dev,
const struct ilo_shader_state **cs,
uint32_t *sampler_state,
int *num_samplers,
uint32_t *binding_table_state,
int *num_surfaces,
int num_ids,
struct ilo_cp *cp);
typedef uint32_t
(*ilo_gpe_gen6_SF_VIEWPORT)(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_CLIP_VIEWPORT)(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_CC_VIEWPORT)(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_COLOR_CALC_STATE)(const struct ilo_dev_info *dev,
const struct pipe_stencil_ref *stencil_ref,
float alpha_ref,
const struct pipe_blend_color *blend_color,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_BLEND_STATE)(const struct ilo_dev_info *dev,
const struct ilo_blend_state *blend,
const struct ilo_fb_state *fb,
const struct pipe_alpha_state *alpha,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_DEPTH_STENCIL_STATE)(const struct ilo_dev_info *dev,
const struct ilo_dsa_state *dsa,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_SCISSOR_RECT)(const struct ilo_dev_info *dev,
const struct ilo_scissor_state *scissor,
unsigned num_viewports,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_BINDING_TABLE_STATE)(const struct ilo_dev_info *dev,
uint32_t *surface_states,
int num_surface_states,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_SURFACE_STATE)(const struct ilo_dev_info *dev,
const struct ilo_view_surface *surface,
bool for_render,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_so_SURFACE_STATE)(const struct ilo_dev_info *dev,
const struct pipe_stream_output_target *so,
const struct pipe_stream_output_info *so_info,
int so_index,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_SAMPLER_STATE)(const struct ilo_dev_info *dev,
const struct ilo_sampler_cso * const *samplers,
const struct pipe_sampler_view * const *views,
const uint32_t *sampler_border_colors,
int num_samplers,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE)(const struct ilo_dev_info *dev,
const struct ilo_sampler_cso *sampler,
struct ilo_cp *cp);
 
typedef uint32_t
(*ilo_gpe_gen6_push_constant_buffer)(const struct ilo_dev_info *dev,
int size, void **pcb,
struct ilo_cp *cp);
 
/**
* GEN6 graphics processing engine
*
* This is a low-level interface. It does not handle the interdependencies
* between states.
*/
struct ilo_gpe_gen6 {
int (*estimate_command_size)(const struct ilo_dev_info *dev,
enum ilo_gpe_gen6_command cmd,
int arg);
 
int (*estimate_state_size)(const struct ilo_dev_info *dev,
enum ilo_gpe_gen6_state state,
int arg);
 
#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name emit_ ## name
GEN6_EMIT(STATE_BASE_ADDRESS);
GEN6_EMIT(STATE_SIP);
GEN6_EMIT(3DSTATE_VF_STATISTICS);
GEN6_EMIT(PIPELINE_SELECT);
GEN6_EMIT(MEDIA_VFE_STATE);
GEN6_EMIT(MEDIA_CURBE_LOAD);
GEN6_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
GEN6_EMIT(MEDIA_GATEWAY_STATE);
GEN6_EMIT(MEDIA_STATE_FLUSH);
GEN6_EMIT(MEDIA_OBJECT_WALKER);
GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS);
GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS);
GEN6_EMIT(3DSTATE_URB);
GEN6_EMIT(3DSTATE_VERTEX_BUFFERS);
GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS);
GEN6_EMIT(3DSTATE_INDEX_BUFFER);
GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS);
GEN6_EMIT(3DSTATE_CC_STATE_POINTERS);
GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS);
GEN6_EMIT(3DSTATE_VS);
GEN6_EMIT(3DSTATE_GS);
GEN6_EMIT(3DSTATE_CLIP);
GEN6_EMIT(3DSTATE_SF);
GEN6_EMIT(3DSTATE_WM);
GEN6_EMIT(3DSTATE_CONSTANT_VS);
GEN6_EMIT(3DSTATE_CONSTANT_GS);
GEN6_EMIT(3DSTATE_CONSTANT_PS);
GEN6_EMIT(3DSTATE_SAMPLE_MASK);
GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE);
GEN6_EMIT(3DSTATE_DEPTH_BUFFER);
GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET);
GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN);
GEN6_EMIT(3DSTATE_LINE_STIPPLE);
GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS);
GEN6_EMIT(3DSTATE_GS_SVB_INDEX);
GEN6_EMIT(3DSTATE_MULTISAMPLE);
GEN6_EMIT(3DSTATE_STENCIL_BUFFER);
GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER);
GEN6_EMIT(3DSTATE_CLEAR_PARAMS);
GEN6_EMIT(PIPE_CONTROL);
GEN6_EMIT(3DPRIMITIVE);
GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA);
GEN6_EMIT(SF_VIEWPORT);
GEN6_EMIT(CLIP_VIEWPORT);
GEN6_EMIT(CC_VIEWPORT);
GEN6_EMIT(COLOR_CALC_STATE);
GEN6_EMIT(BLEND_STATE);
GEN6_EMIT(DEPTH_STENCIL_STATE);
GEN6_EMIT(SCISSOR_RECT);
GEN6_EMIT(BINDING_TABLE_STATE);
GEN6_EMIT(SURFACE_STATE);
GEN6_EMIT(so_SURFACE_STATE);
GEN6_EMIT(SAMPLER_STATE);
GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE);
GEN6_EMIT(push_constant_buffer);
#undef GEN6_EMIT
};
 
const struct ilo_gpe_gen6 *
ilo_gpe_gen6_get(void);
 
/* Below are helpers for other GENs */
 
int
ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling);
 
int
ilo_gpe_gen6_translate_pipe_prim(unsigned prim);
 
int
ilo_gpe_gen6_translate_texture(enum pipe_texture_target target);
 
void
ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
int num_samples,
enum pipe_format depth_format,
uint32_t *payload, unsigned payload_len);
 
void
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
uint32_t *dw, int num_dwords);
 
#endif /* ILO_GPE_GEN6_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.c
0,0 → 1,1939
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_resource.h"
#include "brw_defines.h"
#include "intel_reg.h"
 
#include "ilo_cp.h"
#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_gpe_gen7.h"
 
static void
gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
struct ilo_cp *cp)
{
assert(!"GPGPU_WALKER unsupported");
}
 
static void
gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
uint32_t clear_val,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
const uint8_t cmd_len = 3;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, clear_val);
ilo_cp_write(cp, 1);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
int subop, uint32_t pointer,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
const uint8_t cmd_len = 2;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, pointer);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
uint32_t color_calc_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
}
 
void
ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
struct ilo_shader_cso *cso)
{
int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
 
/* in pairs */
vue_read_len = (vue_read_len + 1) / 2;
 
switch (dev->gen) {
case ILO_GEN(7):
max_threads = (dev->gt == 2) ? 128 : 36;
break;
default:
max_threads = 1;
break;
}
 
dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
 
dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
GEN7_GS_INCLUDE_VERTEX_HANDLES |
0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
 
dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
GEN6_GS_STATISTICS_ENABLE |
GEN6_GS_ENABLE;
 
STATIC_ASSERT(Elements(cso->payload) >= 3);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
}
 
static void
gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
int num_samplers,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
const uint8_t cmd_len = 7;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
if (!gs) {
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
return;
}
 
cso = ilo_shader_get_kernel_cso(gs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
 
dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);
ilo_cp_write(cp, dw5);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct pipe_surface *zs_surf,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
const uint8_t cmd_len = 7;
const int num_samples = 1;
uint32_t payload[6];
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
rasterizer, num_samples,
(zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
payload, Elements(payload));
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write_multi(cp, payload, 6);
ilo_cp_end(cp);
}
 
void
ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
const struct pipe_rasterizer_state *state,
struct ilo_rasterizer_wm *wm)
{
uint32_t dw1, dw2;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
dw1 = GEN7_WM_POSITION_ZW_PIXEL |
GEN7_WM_LINE_AA_WIDTH_2_0 |
GEN7_WM_MSRAST_OFF_PIXEL;
 
/* same value as in 3DSTATE_SF */
if (state->line_smooth)
dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
 
if (state->poly_stipple_enable)
dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
if (state->line_stipple_enable)
dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
 
if (state->bottom_edge_rule)
dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
 
dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
 
/*
* assertion that makes sure
*
* dw1 |= wm->dw_msaa_rast;
* dw2 |= wm->dw_msaa_disp;
*
* is valid
*/
STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 &&
GEN7_WM_MSDISPMODE_PERSAMPLE == 0);
 
wm->dw_msaa_rast =
(state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0;
wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL;
 
STATIC_ASSERT(Elements(wm->payload) >= 2);
wm->payload[0] = dw1;
wm->payload[1] = dw2;
}
 
void
ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
struct ilo_shader_cso *cso)
{
int start_grf, max_threads;
uint32_t dw2, dw4, dw5;
uint32_t wm_interps, wm_dw1;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
/* see brwCreateContext() */
max_threads = (dev->gt == 2) ? 172 : 48;
 
dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT;
 
dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
GEN7_PS_POSOFFSET_NONE;
 
if (false)
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
 
assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
 
dw5 = start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
 
/* FS affects 3DSTATE_WM too */
wm_dw1 = 0;
 
/*
* TODO set this bit only when
*
* a) fs writes colors and color is not masked, or
* b) fs writes depth, or
* c) fs or cc kills
*/
wm_dw1 |= GEN7_WM_DISPATCH_ENABLE;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 278:
*
* "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
* the PS kernel or color calculator has the ability to kill
* (discard) pixels or samples, other than due to depth or stencil
* testing. This bit is required to be ENABLED in the following
* situations:
*
* - The API pixel shader program contains "killpix" or "discard"
* instructions, or other code in the pixel shader kernel that
* can cause the final pixel mask to differ from the pixel mask
* received on dispatch.
*
* - A sampler with chroma key enabled with kill pixel mode is used
* by the pixel shader.
*
* - Any render target has Alpha Test Enable or AlphaToCoverage
* Enable enabled.
*
* - The pixel shader kernel generates and outputs oMask.
*
* Note: As ClipDistance clipping is fully supported in hardware
* and therefore not via PS instructions, there should be no need
* to ENABLE this bit due to ClipDistance clipping."
*/
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
wm_dw1 |= GEN7_WM_KILL_ENABLE;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
wm_dw1 |= GEN7_WM_PSCDEPTH_ON;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
wm_dw1 |= GEN7_WM_USES_SOURCE_DEPTH;
 
if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
wm_dw1 |= GEN7_WM_USES_SOURCE_W;
 
wm_interps = ilo_shader_get_kernel_param(fs,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
 
wm_dw1 |= wm_interps << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
STATIC_ASSERT(Elements(cso->payload) >= 4);
cso->payload[0] = dw2;
cso->payload[1] = dw4;
cso->payload[2] = dw5;
cso->payload[3] = wm_dw1;
}
 
static void
gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
const struct ilo_rasterizer_state *rasterizer,
bool cc_may_kill,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
const uint8_t cmd_len = 3;
const int num_samples = 1;
uint32_t dw1, dw2;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
/* see ilo_gpe_init_rasterizer_wm() */
dw1 = rasterizer->wm.payload[0];
dw2 = rasterizer->wm.payload[1];
 
dw1 |= GEN7_WM_STATISTICS_ENABLE;
 
if (false) {
dw1 |= GEN7_WM_DEPTH_CLEAR;
dw1 |= GEN7_WM_DEPTH_RESOLVE;
dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
}
 
if (fs) {
const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
 
dw1 |= fs_cso->payload[3];
}
 
if (cc_may_kill) {
dw1 |= GEN7_WM_DISPATCH_ENABLE |
GEN7_WM_KILL_ENABLE;
}
 
if (num_samples > 1) {
dw1 |= rasterizer->wm.dw_msaa_rast;
dw2 |= rasterizer->wm.dw_msaa_disp;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, dw2);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
int subop,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
const uint8_t cmd_len = 7;
uint32_t dw[6];
int total_read_length, i;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
/* VS, HS, DS, GS, and PS variants */
assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
 
assert(num_bufs <= 4);
 
dw[0] = 0;
dw[1] = 0;
 
total_read_length = 0;
for (i = 0; i < 4; i++) {
int read_len;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 112:
*
* "Constant buffers must be enabled in order from Constant Buffer 0
* to Constant Buffer 3 within this command. For example, it is
* not allowed to enable Constant Buffer 1 by programming a
* non-zero value in the VS Constant Buffer 1 Read Length without a
* non-zero value in VS Constant Buffer 0 Read Length."
*/
if (i >= num_bufs || !sizes[i]) {
for (; i < 4; i++) {
assert(i >= num_bufs || !sizes[i]);
dw[2 + i] = 0;
}
break;
}
 
/* read lengths are in 256-bit units */
read_len = (sizes[i] + 31) / 32;
/* the lower 5 bits are used for memory object control state */
assert(bufs[i] % 32 == 0);
 
dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
dw[2 + i] = bufs[i];
 
total_read_length += read_len;
}
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 113:
*
* "The sum of all four read length fields must be less than or equal
* to the size of 64"
*/
assert(total_read_length <= 64);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write_multi(cp, dw, 6);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
}
 
static void
gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
}
 
static void
gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
}
 
static void
gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
unsigned sample_mask,
int num_samples,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
const uint8_t cmd_len = 2;
const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 294:
*
* "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
* (Sample Mask) must be zero.
*
* If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
* must be zero."
*/
sample_mask &= valid_mask;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, sample_mask);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
}
 
static void
gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp)
{
gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
}
 
static void
gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
const struct ilo_shader_state *hs,
int num_samplers,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
const uint8_t cmd_len = 7;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
assert(!hs);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
const uint8_t cmd_len = 4;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
const struct ilo_shader_state *ds,
int num_samplers,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
const uint8_t cmd_len = 6;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
assert(!ds);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
 
}
 
static void
gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
unsigned buffer_mask,
int vertex_attrib_count,
bool rasterizer_discard,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
const uint8_t cmd_len = 3;
const bool enable = (buffer_mask != 0);
uint32_t dw1, dw2;
int read_len;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
if (!enable) {
dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
if (rasterizer_discard)
dw1 |= SO_RENDERING_DISABLE;
 
dw2 = 0;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, dw2);
ilo_cp_end(cp);
return;
}
 
read_len = (vertex_attrib_count + 1) / 2;
if (!read_len)
read_len = 1;
 
dw1 = SO_FUNCTION_ENABLE |
0 << SO_RENDER_STREAM_SELECT_SHIFT |
SO_STATISTICS_ENABLE |
buffer_mask << 8;
 
if (rasterizer_discard)
dw1 |= SO_RENDERING_DISABLE;
 
/* API_OPENGL */
if (true)
dw1 |= SO_REORDER_TRAILING;
 
dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
(read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, dw1);
ilo_cp_write(cp, dw2);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
const uint8_t cmd_len = 14;
uint32_t dw[13];
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
fs, last_sh, dw, Elements(dw));
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write_multi(cp, dw, 13);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
int num_samplers, bool dual_blend,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
const uint8_t cmd_len = 8;
const struct ilo_shader_cso *cso;
uint32_t dw2, dw4, dw5;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
if (!fs) {
/* see brwCreateContext() */
const int max_threads = (dev->gt == 2) ? 172 : 48;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
/* GPU hangs if none of the dispatch enable bits is set */
ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
GEN7_PS_8_DISPATCH_ENABLE);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
 
return;
}
 
cso = ilo_shader_get_kernel_cso(fs);
dw2 = cso->payload[0];
dw4 = cso->payload[1];
dw5 = cso->payload[2];
 
dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
 
if (dual_blend)
dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);
ilo_cp_write(cp, dw5);
ilo_cp_write(cp, 0); /* kernel 1 */
ilo_cp_write(cp, 0); /* kernel 2 */
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
uint32_t sf_clip_viewport,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
}
 
static void
gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
uint32_t cc_viewport,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
}
 
static void
gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
uint32_t blend_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
}
 
static void
gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
uint32_t depth_stencil_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
}
 
static void
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
}
 
static void
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
}
 
static void
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
}
 
static void
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
}
 
static void
gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
}
 
static void
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
}
 
static void
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
}
 
static void
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
}
 
static void
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
}
 
static void
gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp)
{
gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
}
 
static void
gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
int subop, int offset, int size,
int entry_size,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
const uint8_t cmd_len = 2;
const int row_size = 64; /* 512 bits */
int alloc_size, num_entries, min_entries, max_entries;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
/* VS, HS, DS, and GS variants */
assert(subop >= 0x30 && subop <= 0x33);
 
/* in multiples of 8KB */
assert(offset % 8192 == 0);
offset /= 8192;
 
/* in multiple of 512-bit rows */
alloc_size = (entry_size + row_size - 1) / row_size;
if (!alloc_size)
alloc_size = 1;
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 34:
*
* "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
* cause performance to decrease due to banking in the URB. Element
* sizes of 16 to 20 should be programmed with six 512-bit URB rows."
*/
if (subop == 0x30 && alloc_size == 5)
alloc_size = 6;
 
/* in multiples of 8 */
num_entries = (size / row_size / alloc_size) & ~7;
 
switch (subop) {
case 0x30: /* 3DSTATE_URB_VS */
min_entries = 32;
max_entries = (dev->gt == 2) ? 704 : 512;
 
assert(num_entries >= min_entries);
if (num_entries > max_entries)
num_entries = max_entries;
break;
case 0x31: /* 3DSTATE_URB_HS */
max_entries = (dev->gt == 2) ? 64 : 32;
if (num_entries > max_entries)
num_entries = max_entries;
break;
case 0x32: /* 3DSTATE_URB_DS */
if (num_entries)
assert(num_entries >= 138);
break;
case 0x33: /* 3DSTATE_URB_GS */
max_entries = (dev->gt == 2) ? 320 : 192;
if (num_entries > max_entries)
num_entries = max_entries;
break;
default:
break;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
(alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
num_entries);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
}
 
static void
gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
}
 
static void
gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
}
 
static void
gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
}
 
static void
gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
int subop, int offset, int size,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
const uint8_t cmd_len = 2;
int end;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
/* VS, HS, DS, GS, and PS variants */
assert(subop >= 0x12 && subop <= 0x16);
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 68:
*
* "(A table that says the maximum size of each constant buffer is
* 16KB")
*
* From the Ivy Bridge PRM, volume 2 part 1, page 115:
*
* "The sum of the Constant Buffer Offset and the Constant Buffer Size
* may not exceed the maximum value of the Constant Buffer Size."
*
* Thus, the valid range of buffer end is [0KB, 16KB].
*/
end = (offset + size) / 1024;
if (end > 16) {
assert(!"invalid constant buffer end");
end = 16;
}
 
/* the valid range of buffer offset is [0KB, 15KB] */
offset = (offset + 1023) / 1024;
if (offset > 15) {
assert(!"invalid constant buffer offset");
offset = 15;
}
 
if (offset > end) {
assert(!size);
offset = end;
}
 
/* the valid range of buffer size is [0KB, 15KB] */
size = end - offset;
if (size > 15) {
assert(!"invalid constant buffer size");
size = 15;
}
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
size);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
}
 
static void
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
}
 
static void
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
}
 
static void
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
}
 
static void
gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp)
{
gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
}
 
static void
gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
const struct pipe_stream_output_info *so_info,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
uint16_t cmd_len;
int buffer_selects, num_entries, i;
uint16_t so_decls[128];
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
buffer_selects = 0;
num_entries = 0;
 
if (so_info) {
int buffer_offsets[PIPE_MAX_SO_BUFFERS];
 
memset(buffer_offsets, 0, sizeof(buffer_offsets));
 
for (i = 0; i < so_info->num_outputs; i++) {
unsigned decl, buf, reg, mask;
 
buf = so_info->output[i].output_buffer;
 
/* pad with holes */
assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
int num_dwords;
 
num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
if (num_dwords > 4)
num_dwords = 4;
 
decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
SO_DECL_HOLE_FLAG |
((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
 
so_decls[num_entries++] = decl;
buffer_offsets[buf] += num_dwords;
}
 
reg = so_info->output[i].register_index;
mask = ((1 << so_info->output[i].num_components) - 1) <<
so_info->output[i].start_component;
 
decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
reg << SO_DECL_REGISTER_INDEX_SHIFT |
mask << SO_DECL_COMPONENT_MASK_SHIFT;
 
so_decls[num_entries++] = decl;
buffer_selects |= 1 << buf;
buffer_offsets[buf] += so_info->output[i].num_components;
}
}
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 201:
*
* "Errata: All 128 decls for all four streams must be included
* whenever this command is issued. The "Num Entries [n]" fields still
* contain the actual numbers of valid decls."
*
* Also note that "DWord Length" has 9 bits for this command, and the type
* of cmd_len is thus uint16_t.
*/
cmd_len = 2 * 128 + 3;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
0 << SO_NUM_ENTRIES_2_SHIFT |
0 << SO_NUM_ENTRIES_1_SHIFT |
num_entries << SO_NUM_ENTRIES_0_SHIFT);
 
for (i = 0; i < num_entries; i++) {
ilo_cp_write(cp, so_decls[i]);
ilo_cp_write(cp, 0);
}
for (; i < 128; i++) {
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
}
 
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
int index, int base, int stride,
const struct pipe_stream_output_target *so_target,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
const uint8_t cmd_len = 4;
struct ilo_buffer *buf;
int end;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
if (!so_target || !so_target->buffer) {
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
ilo_cp_write(cp, 0);
ilo_cp_write(cp, 0);
ilo_cp_end(cp);
return;
}
 
buf = ilo_buffer(so_target->buffer);
 
/* DWord-aligned */
assert(stride % 4 == 0 && base % 4 == 0);
assert(so_target->buffer_offset % 4 == 0);
 
stride &= ~3;
base = (base + so_target->buffer_offset) & ~3;
end = (base + so_target->buffer_size) & ~3;
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
stride);
ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
ilo_cp_end(cp);
}
 
static void
gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
const struct pipe_draw_info *info,
const struct ilo_ib_state *ib,
bool rectlist,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
const uint8_t cmd_len = 7;
const int prim = (rectlist) ?
_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
const int vb_access = (info->indexed) ?
GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
const uint32_t vb_start = info->start +
((info->indexed) ? ib->draw_start_offset : 0);
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
ilo_cp_write(cp, vb_access | prim);
ilo_cp_write(cp, info->count);
ilo_cp_write(cp, vb_start);
ilo_cp_write(cp, info->instance_count);
ilo_cp_write(cp, info->start_instance);
ilo_cp_write(cp, info->index_bias);
ilo_cp_end(cp);
}
 
static uint32_t
gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp)
{
const int state_align = 64 / 4;
const int state_len = 16 * num_viewports;
uint32_t state_offset, *dw;
unsigned i;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 270:
*
* "The viewport-specific state used by both the SF and CL units
* (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
* of which contains the DWords described below. The start of each
* element is spaced 16 DWords apart. The location of first element of
* the array, as specified by both Pointer to SF_VIEWPORT and Pointer
* to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
*/
assert(num_viewports && num_viewports <= 16);
 
dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
state_len, state_align, &state_offset);
 
for (i = 0; i < num_viewports; i++) {
const struct ilo_viewport_cso *vp = &viewports[i];
 
dw[0] = fui(vp->m00);
dw[1] = fui(vp->m11);
dw[2] = fui(vp->m22);
dw[3] = fui(vp->m30);
dw[4] = fui(vp->m31);
dw[5] = fui(vp->m32);
dw[6] = 0;
dw[7] = 0;
dw[8] = fui(vp->min_gbx);
dw[9] = fui(vp->max_gbx);
dw[10] = fui(vp->min_gby);
dw[11] = fui(vp->max_gby);
dw[12] = 0;
dw[13] = 0;
dw[14] = 0;
dw[15] = 0;
 
dw += 16;
}
 
return state_offset;
}
 
void
ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
unsigned width, unsigned height,
unsigned depth, unsigned level,
struct ilo_view_surface *surf)
{
uint32_t *dw;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 62:
*
* "A null surface is used in instances where an actual surface is not
* bound. When a write message is generated to a null surface, no
* actual surface is written to. When a read message (including any
* sampling engine message) is generated to a null surface, the result
* is all zeros. Note that a null surface type is allowed to be used
* with all messages, even if it is not specificially indicated as
* supported. All of the remaining fields in surface state are ignored
* for null surfaces, with the following exceptions:
*
* * Width, Height, Depth, LOD, and Render Target View Extent fields
* must match the depth buffer's corresponding state for all render
* target surfaces, including null.
* * All sampling engine and data port messages support null surfaces
* with the above behavior, even if not mentioned as specifically
* supported, except for the following:
* * Data Port Media Block Read/Write messages.
* * The Surface Type of a surface used as a render target (accessed
* via the Data Port's Render Target Write message) must be the same
* as the Surface Type of all other render targets and of the depth
* buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
* buffer or render targets are SURFTYPE_NULL."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 65:
*
* "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
* true"
*/
 
STATIC_ASSERT(Elements(surf->payload) >= 8);
dw = surf->payload;
 
dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
BRW_SURFACE_TILED << 13;
 
dw[1] = 0;
 
dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
 
dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
 
dw[4] = 0;
dw[5] = level;
 
dw[6] = 0;
dw[7] = 0;
 
surf->bo = NULL;
}
 
void
ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
const struct ilo_buffer *buf,
unsigned offset, unsigned size,
unsigned struct_size,
enum pipe_format elem_format,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
const bool typed = (elem_format != PIPE_FORMAT_NONE);
const bool structured = (!typed && struct_size > 1);
const int elem_size = (typed) ?
util_format_get_blocksize(elem_format) : 1;
int width, height, depth, pitch;
int surface_type, surface_format, num_entries;
uint32_t *dw;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
 
surface_format = (typed) ?
ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
 
num_entries = size / struct_size;
/* see if there is enough space to fit another element */
if (size % struct_size >= elem_size && !structured)
num_entries++;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 67:
*
* "For SURFTYPE_BUFFER render targets, this field (Surface Base
* Address) specifies the base address of first element of the
* surface. The surface is interpreted as a simple array of that
* single element type. The address must be naturally-aligned to the
* element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
* must be 16-byte aligned)
*
* For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
* the base address of the first element of the surface, computed in
* software by adding the surface base address to the byte offset of
* the element in the buffer."
*/
if (is_rt)
assert(offset % elem_size == 0);
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 68:
*
* "For typed buffer and structured buffer surfaces, the number of
* entries in the buffer ranges from 1 to 2^27. For raw buffer
* surfaces, the number of entries in the buffer is the number of
* bytes which can range from 1 to 2^30."
*/
assert(num_entries >= 1 &&
num_entries <= 1 << ((typed || structured) ? 27 : 30));
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 69:
*
* "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
* 11 if the Surface Format is RAW (the size of the buffer must be a
* multiple of 4 bytes)."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
* field (Surface Pitch) indicates the size of the structure."
*
* "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
* must be a multiple of 4 bytes."
*/
if (structured)
assert(struct_size % 4 == 0);
else if (!typed)
assert(num_entries % 4 == 0);
 
pitch = struct_size;
 
pitch--;
num_entries--;
/* bits [6:0] */
width = (num_entries & 0x0000007f);
/* bits [20:7] */
height = (num_entries & 0x001fff80) >> 7;
/* bits [30:21] */
depth = (num_entries & 0x7fe00000) >> 21;
/* limit to [26:21] */
if (typed || structured)
depth &= 0x3f;
 
STATIC_ASSERT(Elements(surf->payload) >= 8);
dw = surf->payload;
 
dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
surface_format << BRW_SURFACE_FORMAT_SHIFT;
if (render_cache_rw)
dw[0] |= BRW_SURFACE_RC_READ_WRITE;
 
dw[1] = offset;
 
dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
SET_FIELD(width, GEN7_SURFACE_WIDTH);
 
dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
pitch;
 
dw[4] = 0;
dw[5] = 0;
 
dw[6] = 0;
dw[7] = 0;
 
/* do not increment reference count */
surf->bo = buf->bo;
}
 
void
ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
const struct ilo_texture *tex,
enum pipe_format format,
unsigned first_level,
unsigned num_levels,
unsigned first_layer,
unsigned num_layers,
bool is_rt, bool render_cache_rw,
struct ilo_view_surface *surf)
{
int surface_type, surface_format;
int width, height, depth, pitch, lod;
unsigned layer_offset, x_offset, y_offset;
uint32_t *dw;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
 
surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
assert(surface_type != BRW_SURFACE_BUFFER);
 
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
format = PIPE_FORMAT_Z32_FLOAT;
 
if (is_rt)
surface_format = ilo_translate_render_format(format);
else
surface_format = ilo_translate_texture_format(format);
assert(surface_format >= 0);
 
width = tex->base.width0;
height = tex->base.height0;
depth = (tex->base.target == PIPE_TEXTURE_3D) ?
tex->base.depth0 : num_layers;
pitch = tex->bo_stride;
 
if (surface_type == BRW_SURFACE_CUBE) {
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
* this field is [0,340], indicating the number of cube array
* elements (equal to the number of underlying 2D array elements
* divided by 6). For other surfaces, this field must be zero."
*
* When is_rt is true, we treat the texture as a 2D one to avoid the
* restriction.
*/
if (is_rt) {
surface_type = BRW_SURFACE_2D;
}
else {
assert(num_layers % 6 == 0);
depth = num_layers / 6;
}
}
 
/* sanity check the size */
assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
assert(first_layer < 2048 && num_layers <= 2048);
switch (surface_type) {
case BRW_SURFACE_1D:
assert(width <= 16384 && height == 1 && depth <= 2048);
break;
case BRW_SURFACE_2D:
assert(width <= 16384 && height <= 16384 && depth <= 2048);
break;
case BRW_SURFACE_3D:
assert(width <= 2048 && height <= 2048 && depth <= 2048);
if (!is_rt)
assert(first_layer == 0);
break;
case BRW_SURFACE_CUBE:
assert(width <= 16384 && height <= 16384 && depth <= 86);
assert(width == height);
if (is_rt)
assert(first_layer == 0);
break;
default:
assert(!"unexpected surface type");
break;
}
 
if (is_rt) {
/*
* Compute the offset to the layer manually.
*
* For rendering, the hardware requires LOD to be the same for all
* render targets and the depth buffer. We need to compute the offset
* to the layer manually and always set LOD to 0.
*/
if (true) {
/* we lose the capability for layered rendering */
assert(num_layers == 1);
 
layer_offset = ilo_texture_get_slice_offset(tex,
first_level, first_layer, &x_offset, &y_offset);
 
assert(x_offset % 4 == 0);
assert(y_offset % 2 == 0);
x_offset /= 4;
y_offset /= 2;
 
/* derive the size for the LOD */
width = u_minify(width, first_level);
height = u_minify(height, first_level);
if (surface_type == BRW_SURFACE_3D)
depth = u_minify(depth, first_level);
else
depth = 1;
 
first_level = 0;
first_layer = 0;
lod = 0;
}
else {
layer_offset = 0;
x_offset = 0;
y_offset = 0;
}
 
assert(num_levels == 1);
lod = first_level;
}
else {
layer_offset = 0;
x_offset = 0;
y_offset = 0;
 
lod = num_levels - 1;
}
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 68:
*
* "The Base Address for linear render target surfaces and surfaces
* accessed with the typed surface read/write data port messages must
* be element-size aligned, for non-YUV surface formats, or a multiple
* of 2 element-sizes for YUV surface formats. Other linear surfaces
* have no alignment requirements (byte alignment is sufficient)."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 70:
*
* "For linear render target surfaces and surfaces accessed with the
* typed data port messages, the pitch must be a multiple of the
* element size for non-YUV surface formats. Pitch must be a multiple
* of 2 * element size for YUV surface formats. For linear surfaces
* with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
* of 4 bytes.For other linear surfaces, the pitch can be any multiple
* of bytes."
*
* From the Ivy Bridge PRM, volume 4 part 1, page 74:
*
* "For linear surfaces, this field (X Offset) must be zero."
*/
if (tex->tiling == INTEL_TILING_NONE) {
if (is_rt) {
const int elem_size = util_format_get_blocksize(format);
assert(layer_offset % elem_size == 0);
assert(pitch % elem_size == 0);
}
 
assert(!x_offset);
}
 
STATIC_ASSERT(Elements(surf->payload) >= 8);
dw = surf->payload;
 
dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
surface_format << BRW_SURFACE_FORMAT_SHIFT |
ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 63:
*
* "If this field (Surface Array) is enabled, the Surface Type must be
* SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
* disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
* SURFTYPE_CUBE, the Depth field must be set to zero."
*
* For non-3D sampler surfaces, resinfo (the sampler message) always
* returns zero for the number of layers when this field is not set.
*/
if (surface_type != BRW_SURFACE_3D) {
if (util_resource_is_array_texture(&tex->base))
dw[0] |= GEN7_SURFACE_IS_ARRAY;
else
assert(depth == 1);
}
 
if (tex->valign_4)
dw[0] |= GEN7_SURFACE_VALIGN_4;
 
if (tex->halign_8)
dw[0] |= GEN7_SURFACE_HALIGN_8;
 
if (tex->array_spacing_full)
dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
else
dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
 
if (render_cache_rw)
dw[0] |= BRW_SURFACE_RC_READ_WRITE;
 
if (surface_type == BRW_SURFACE_CUBE && !is_rt)
dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
 
dw[1] = layer_offset;
 
dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
 
dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
(pitch - 1);
 
dw[4] = first_layer << 18 |
(num_layers - 1) << 7;
 
/*
* MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
* means the samples are interleaved. The layouts are the same when the
* number of samples is 1.
*/
if (tex->interleaved && tex->base.nr_samples > 1) {
assert(!is_rt);
dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
}
else {
dw[4] |= GEN7_SURFACE_MSFMT_MSS;
}
 
if (tex->base.nr_samples > 4)
dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
else if (tex->base.nr_samples > 2)
dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
else
dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
 
dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
lod;
 
dw[6] = 0;
dw[7] = 0;
 
/* do not increment reference count */
surf->bo = tex->bo;
}
 
static int
gen7_estimate_command_size(const struct ilo_dev_info *dev,
enum ilo_gpe_gen7_command cmd,
int arg)
{
static const struct {
int header;
int body;
} gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
[ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
[ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
[ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
[ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
[ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
[ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
[ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
[ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
[ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
[ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
[ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
[ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
[ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
[ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
[ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
[ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
[ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
[ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
[ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
[ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
[ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
[ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
[ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
[ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
[ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
};
const int header = gen7_command_size_table[cmd].header;
const int body = gen7_command_size_table[cmd].body;
const int count = arg;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
 
return (likely(count)) ? header + body * count : 0;
}
 
static int
gen7_estimate_state_size(const struct ilo_dev_info *dev,
enum ilo_gpe_gen7_state state,
int arg)
{
static const struct {
int alignment;
int body;
bool is_array;
} gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
[ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
[ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
[ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
[ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
[ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
[ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
[ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
[ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
[ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
[ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
[ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
[ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
};
const int alignment = gen7_state_size_table[state].alignment;
const int body = gen7_state_size_table[state].body;
const bool is_array = gen7_state_size_table[state].is_array;
const int count = arg;
int estimate;
 
ILO_GPE_VALID_GEN(dev, 7, 7);
assert(state < ILO_GPE_GEN7_STATE_COUNT);
 
if (likely(count)) {
if (is_array) {
estimate = (alignment - 1) + body * count;
}
else {
estimate = (alignment - 1) + body;
/* all states are aligned */
if (count > 1)
estimate += util_align_npot(body, alignment) * (count - 1);
}
}
else {
estimate = 0;
}
 
return estimate;
}
 
static void
gen7_init(struct ilo_gpe_gen7 *gen7)
{
const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
 
gen7->estimate_command_size = gen7_estimate_command_size;
gen7->estimate_state_size = gen7_estimate_state_size;
 
#define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
#define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
GEN7_USE(gen7, STATE_SIP, gen6);
GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
GEN7_USE(gen7, PIPELINE_SELECT, gen6);
GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
GEN7_SET(gen7, GPGPU_WALKER);
GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
GEN7_USE(gen7, 3DSTATE_VS, gen6);
GEN7_SET(gen7, 3DSTATE_GS);
GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
GEN7_SET(gen7, 3DSTATE_SF);
GEN7_SET(gen7, 3DSTATE_WM);
GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
GEN7_SET(gen7, 3DSTATE_HS);
GEN7_SET(gen7, 3DSTATE_TE);
GEN7_SET(gen7, 3DSTATE_DS);
GEN7_SET(gen7, 3DSTATE_STREAMOUT);
GEN7_SET(gen7, 3DSTATE_SBE);
GEN7_SET(gen7, 3DSTATE_PS);
GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
GEN7_SET(gen7, 3DSTATE_URB_VS);
GEN7_SET(gen7, 3DSTATE_URB_HS);
GEN7_SET(gen7, 3DSTATE_URB_DS);
GEN7_SET(gen7, 3DSTATE_URB_GS);
GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
GEN7_USE(gen7, PIPE_CONTROL, gen6);
GEN7_SET(gen7, 3DPRIMITIVE);
GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
GEN7_SET(gen7, SF_CLIP_VIEWPORT);
GEN7_USE(gen7, CC_VIEWPORT, gen6);
GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
GEN7_USE(gen7, BLEND_STATE, gen6);
GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
GEN7_USE(gen7, SCISSOR_RECT, gen6);
GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
GEN7_USE(gen7, SURFACE_STATE, gen6);
GEN7_USE(gen7, SAMPLER_STATE, gen6);
GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
GEN7_USE(gen7, push_constant_buffer, gen6);
#undef GEN7_USE
#undef GEN7_SET
}
 
static struct ilo_gpe_gen7 gen7_gpe;
 
const struct ilo_gpe_gen7 *
ilo_gpe_gen7_get(void)
{
if (!gen7_gpe.estimate_command_size)
gen7_init(&gen7_gpe);
 
return &gen7_gpe;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpe_gen7.h
0,0 → 1,493
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_GPE_GEN7_H
#define ILO_GPE_GEN7_H
 
#include "ilo_common.h"
#include "ilo_gpe_gen6.h"
 
/**
* Commands that GEN7 GPE could emit.
*/
enum ilo_gpe_gen7_command {
ILO_GPE_GEN7_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
ILO_GPE_GEN7_STATE_SIP, /* (0x0, 0x1, 0x02) */
ILO_GPE_GEN7_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
ILO_GPE_GEN7_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
ILO_GPE_GEN7_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
ILO_GPE_GEN7_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
ILO_GPE_GEN7_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
ILO_GPE_GEN7_GPGPU_WALKER, /* (0x2, 0x1, 0x05) */
ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x0, 0x04) */
ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x0, 0x05) */
ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x0, 0x06) */
ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x0, 0x07) */
ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
ILO_GPE_GEN7_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
ILO_GPE_GEN7_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
ILO_GPE_GEN7_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
ILO_GPE_GEN7_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
ILO_GPE_GEN7_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
ILO_GPE_GEN7_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
ILO_GPE_GEN7_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
ILO_GPE_GEN7_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
ILO_GPE_GEN7_3DSTATE_CONSTANT_HS, /* (0x3, 0x0, 0x19) */
ILO_GPE_GEN7_3DSTATE_CONSTANT_DS, /* (0x3, 0x0, 0x1a) */
ILO_GPE_GEN7_3DSTATE_HS, /* (0x3, 0x0, 0x1b) */
ILO_GPE_GEN7_3DSTATE_TE, /* (0x3, 0x0, 0x1c) */
ILO_GPE_GEN7_3DSTATE_DS, /* (0x3, 0x0, 0x1d) */
ILO_GPE_GEN7_3DSTATE_STREAMOUT, /* (0x3, 0x0, 0x1e) */
ILO_GPE_GEN7_3DSTATE_SBE, /* (0x3, 0x0, 0x1f) */
ILO_GPE_GEN7_3DSTATE_PS, /* (0x3, 0x0, 0x20) */
ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, /* (0x3, 0x0, 0x21) */
ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC, /* (0x3, 0x0, 0x23) */
ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS, /* (0x3, 0x0, 0x24) */
ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, /* (0x3, 0x0, 0x25) */
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, /* (0x3, 0x0, 0x26) */
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS, /* (0x3, 0x0, 0x27) */
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS, /* (0x3, 0x0, 0x28) */
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS, /* (0x3, 0x0, 0x29) */
ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS, /* (0x3, 0x0, 0x2a) */
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, /* (0x3, 0x0, 0x2b) */
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS, /* (0x3, 0x0, 0x2c) */
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS, /* (0x3, 0x0, 0x2d) */
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS, /* (0x3, 0x0, 0x2e) */
ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS, /* (0x3, 0x0, 0x2f) */
ILO_GPE_GEN7_3DSTATE_URB_VS, /* (0x3, 0x0, 0x30) */
ILO_GPE_GEN7_3DSTATE_URB_HS, /* (0x3, 0x0, 0x31) */
ILO_GPE_GEN7_3DSTATE_URB_DS, /* (0x3, 0x0, 0x32) */
ILO_GPE_GEN7_3DSTATE_URB_GS, /* (0x3, 0x0, 0x33) */
ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
ILO_GPE_GEN7_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, /* (0x3, 0x1, 0x12) */
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS, /* (0x3, 0x1, 0x13) */
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS, /* (0x3, 0x1, 0x14) */
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, /* (0x3, 0x1, 0x15) */
ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, /* (0x3, 0x1, 0x16) */
ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST, /* (0x3, 0x1, 0x17) */
ILO_GPE_GEN7_3DSTATE_SO_BUFFER, /* (0x3, 0x1, 0x18) */
ILO_GPE_GEN7_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
ILO_GPE_GEN7_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
 
ILO_GPE_GEN7_COMMAND_COUNT,
};
 
/**
* Indirect states that GEN7 GPE could emit.
*/
enum ilo_gpe_gen7_state {
ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA,
ILO_GPE_GEN7_SF_CLIP_VIEWPORT,
ILO_GPE_GEN7_CC_VIEWPORT,
ILO_GPE_GEN7_COLOR_CALC_STATE,
ILO_GPE_GEN7_BLEND_STATE,
ILO_GPE_GEN7_DEPTH_STENCIL_STATE,
ILO_GPE_GEN7_SCISSOR_RECT,
ILO_GPE_GEN7_BINDING_TABLE_STATE,
ILO_GPE_GEN7_SURFACE_STATE,
ILO_GPE_GEN7_SAMPLER_STATE,
ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE,
ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER,
 
ILO_GPE_GEN7_STATE_COUNT,
};
 
typedef ilo_gpe_gen6_STATE_BASE_ADDRESS ilo_gpe_gen7_STATE_BASE_ADDRESS;
typedef ilo_gpe_gen6_STATE_SIP ilo_gpe_gen7_STATE_SIP;
typedef ilo_gpe_gen6_3DSTATE_VF_STATISTICS ilo_gpe_gen7_3DSTATE_VF_STATISTICS;
typedef ilo_gpe_gen6_PIPELINE_SELECT ilo_gpe_gen7_PIPELINE_SELECT;
typedef ilo_gpe_gen6_MEDIA_VFE_STATE ilo_gpe_gen7_MEDIA_VFE_STATE;
typedef ilo_gpe_gen6_MEDIA_CURBE_LOAD ilo_gpe_gen7_MEDIA_CURBE_LOAD;
typedef ilo_gpe_gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD ilo_gpe_gen7_MEDIA_INTERFACE_DESCRIPTOR_LOAD;
typedef ilo_gpe_gen6_MEDIA_STATE_FLUSH ilo_gpe_gen7_MEDIA_STATE_FLUSH;
 
typedef void
(*ilo_gpe_gen7_GPGPU_WALKER)(const struct ilo_dev_info *dev,
struct ilo_cp *cp);
 
typedef ilo_gpe_gen6_3DSTATE_CLEAR_PARAMS ilo_gpe_gen7_3DSTATE_CLEAR_PARAMS;
typedef ilo_gpe_gen6_3DSTATE_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_DEPTH_BUFFER;
typedef ilo_gpe_gen6_3DSTATE_STENCIL_BUFFER ilo_gpe_gen7_3DSTATE_STENCIL_BUFFER;
typedef ilo_gpe_gen6_3DSTATE_HIER_DEPTH_BUFFER ilo_gpe_gen7_3DSTATE_HIER_DEPTH_BUFFER;
typedef ilo_gpe_gen6_3DSTATE_VERTEX_BUFFERS ilo_gpe_gen7_3DSTATE_VERTEX_BUFFERS;
typedef ilo_gpe_gen6_3DSTATE_VERTEX_ELEMENTS ilo_gpe_gen7_3DSTATE_VERTEX_ELEMENTS;
typedef ilo_gpe_gen6_3DSTATE_INDEX_BUFFER ilo_gpe_gen7_3DSTATE_INDEX_BUFFER;
 
typedef void
(*ilo_gpe_gen7_3DSTATE_CC_STATE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t color_calc_state,
struct ilo_cp *cp);
 
typedef ilo_gpe_gen6_3DSTATE_SCISSOR_STATE_POINTERS ilo_gpe_gen7_3DSTATE_SCISSOR_STATE_POINTERS;
typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS;
 
typedef void
(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *gs,
int num_samplers,
struct ilo_cp *cp);
 
typedef ilo_gpe_gen6_3DSTATE_CLIP ilo_gpe_gen7_3DSTATE_CLIP;
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SF)(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct pipe_surface *zs_surf,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_WM)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
const struct ilo_rasterizer_state *rasterizer,
bool cc_may_kill,
struct ilo_cp *cp);
 
typedef ilo_gpe_gen6_3DSTATE_CONSTANT_VS ilo_gpe_gen7_3DSTATE_CONSTANT_VS;
typedef ilo_gpe_gen6_3DSTATE_CONSTANT_GS ilo_gpe_gen7_3DSTATE_CONSTANT_GS;
typedef ilo_gpe_gen6_3DSTATE_CONSTANT_PS ilo_gpe_gen7_3DSTATE_CONSTANT_PS;
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SAMPLE_MASK)(const struct ilo_dev_info *dev,
unsigned sample_mask,
int num_samples,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_CONSTANT_HS)(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_CONSTANT_DS)(const struct ilo_dev_info *dev,
const uint32_t *bufs, const int *sizes,
int num_bufs,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_HS)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *hs,
int num_samplers,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_TE)(const struct ilo_dev_info *dev,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_DS)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *ds,
int num_samplers,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_STREAMOUT)(const struct ilo_dev_info *dev,
unsigned buffer_mask,
int vertex_attrib_count,
bool rasterizer_discard,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SBE)(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
const struct ilo_shader_state *fs,
const struct ilo_shader_state *last_sh,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_PS)(const struct ilo_dev_info *dev,
const struct ilo_shader_state *fs,
int num_samplers, bool dual_blend,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP)(const struct ilo_dev_info *dev,
uint32_t viewport,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC)(const struct ilo_dev_info *dev,
uint32_t viewport,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_BLEND_STATE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t blend,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS)(const struct ilo_dev_info *dev,
uint32_t depth_stencil,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_VS)(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_HS)(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_DS)(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_GS)(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_BINDING_TABLE_POINTERS_PS)(const struct ilo_dev_info *dev,
uint32_t binding_table,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS)(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS)(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS)(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS)(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS)(const struct ilo_dev_info *dev,
uint32_t sampler_state,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_URB_VS)(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_URB_HS)(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_URB_DS)(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_URB_GS)(const struct ilo_dev_info *dev,
int offset, int size, int entry_size,
struct ilo_cp *cp);
 
typedef ilo_gpe_gen6_3DSTATE_DRAWING_RECTANGLE ilo_gpe_gen7_3DSTATE_DRAWING_RECTANGLE;
typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_OFFSET ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_OFFSET;
typedef ilo_gpe_gen6_3DSTATE_POLY_STIPPLE_PATTERN ilo_gpe_gen7_3DSTATE_POLY_STIPPLE_PATTERN;
typedef ilo_gpe_gen6_3DSTATE_LINE_STIPPLE ilo_gpe_gen7_3DSTATE_LINE_STIPPLE;
typedef ilo_gpe_gen6_3DSTATE_AA_LINE_PARAMETERS ilo_gpe_gen7_3DSTATE_AA_LINE_PARAMETERS;
typedef ilo_gpe_gen6_3DSTATE_MULTISAMPLE ilo_gpe_gen7_3DSTATE_MULTISAMPLE;
 
typedef void
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS)(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS)(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS)(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS)(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS)(const struct ilo_dev_info *dev,
int offset, int size,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SO_DECL_LIST)(const struct ilo_dev_info *dev,
const struct pipe_stream_output_info *so_info,
struct ilo_cp *cp);
 
typedef void
(*ilo_gpe_gen7_3DSTATE_SO_BUFFER)(const struct ilo_dev_info *dev,
int index, int base, int stride,
const struct pipe_stream_output_target *so_target,
struct ilo_cp *cp);
 
typedef ilo_gpe_gen6_PIPE_CONTROL ilo_gpe_gen7_PIPE_CONTROL;
typedef ilo_gpe_gen6_3DPRIMITIVE ilo_gpe_gen7_3DPRIMITIVE;
typedef ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA ilo_gpe_gen7_INTERFACE_DESCRIPTOR_DATA;
 
typedef uint32_t
(*ilo_gpe_gen7_SF_CLIP_VIEWPORT)(const struct ilo_dev_info *dev,
const struct ilo_viewport_cso *viewports,
unsigned num_viewports,
struct ilo_cp *cp);
 
typedef ilo_gpe_gen6_CC_VIEWPORT ilo_gpe_gen7_CC_VIEWPORT;
typedef ilo_gpe_gen6_COLOR_CALC_STATE ilo_gpe_gen7_COLOR_CALC_STATE;
typedef ilo_gpe_gen6_BLEND_STATE ilo_gpe_gen7_BLEND_STATE;
typedef ilo_gpe_gen6_DEPTH_STENCIL_STATE ilo_gpe_gen7_DEPTH_STENCIL_STATE;
typedef ilo_gpe_gen6_SCISSOR_RECT ilo_gpe_gen7_SCISSOR_RECT;
typedef ilo_gpe_gen6_BINDING_TABLE_STATE ilo_gpe_gen7_BINDING_TABLE_STATE;
typedef ilo_gpe_gen6_SURFACE_STATE ilo_gpe_gen7_SURFACE_STATE;
typedef ilo_gpe_gen6_SAMPLER_STATE ilo_gpe_gen7_SAMPLER_STATE;
typedef ilo_gpe_gen6_SAMPLER_BORDER_COLOR_STATE ilo_gpe_gen7_SAMPLER_BORDER_COLOR_STATE;
typedef ilo_gpe_gen6_push_constant_buffer ilo_gpe_gen7_push_constant_buffer;
 
/**
* GEN7 graphics processing engine
*
* \see ilo_gpe_gen6
*/
struct ilo_gpe_gen7 {
int (*estimate_command_size)(const struct ilo_dev_info *dev,
enum ilo_gpe_gen7_command cmd,
int arg);
 
int (*estimate_state_size)(const struct ilo_dev_info *dev,
enum ilo_gpe_gen7_state state,
int arg);
 
#define GEN7_EMIT(name) ilo_gpe_gen7_ ## name emit_ ## name
GEN7_EMIT(STATE_BASE_ADDRESS);
GEN7_EMIT(STATE_SIP);
GEN7_EMIT(3DSTATE_VF_STATISTICS);
GEN7_EMIT(PIPELINE_SELECT);
GEN7_EMIT(MEDIA_VFE_STATE);
GEN7_EMIT(MEDIA_CURBE_LOAD);
GEN7_EMIT(MEDIA_INTERFACE_DESCRIPTOR_LOAD);
GEN7_EMIT(MEDIA_STATE_FLUSH);
GEN7_EMIT(GPGPU_WALKER);
GEN7_EMIT(3DSTATE_CLEAR_PARAMS);
GEN7_EMIT(3DSTATE_DEPTH_BUFFER);
GEN7_EMIT(3DSTATE_STENCIL_BUFFER);
GEN7_EMIT(3DSTATE_HIER_DEPTH_BUFFER);
GEN7_EMIT(3DSTATE_VERTEX_BUFFERS);
GEN7_EMIT(3DSTATE_VERTEX_ELEMENTS);
GEN7_EMIT(3DSTATE_INDEX_BUFFER);
GEN7_EMIT(3DSTATE_CC_STATE_POINTERS);
GEN7_EMIT(3DSTATE_SCISSOR_STATE_POINTERS);
GEN7_EMIT(3DSTATE_VS);
GEN7_EMIT(3DSTATE_GS);
GEN7_EMIT(3DSTATE_CLIP);
GEN7_EMIT(3DSTATE_SF);
GEN7_EMIT(3DSTATE_WM);
GEN7_EMIT(3DSTATE_CONSTANT_VS);
GEN7_EMIT(3DSTATE_CONSTANT_GS);
GEN7_EMIT(3DSTATE_CONSTANT_PS);
GEN7_EMIT(3DSTATE_SAMPLE_MASK);
GEN7_EMIT(3DSTATE_CONSTANT_HS);
GEN7_EMIT(3DSTATE_CONSTANT_DS);
GEN7_EMIT(3DSTATE_HS);
GEN7_EMIT(3DSTATE_TE);
GEN7_EMIT(3DSTATE_DS);
GEN7_EMIT(3DSTATE_STREAMOUT);
GEN7_EMIT(3DSTATE_SBE);
GEN7_EMIT(3DSTATE_PS);
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
GEN7_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
GEN7_EMIT(3DSTATE_BLEND_STATE_POINTERS);
GEN7_EMIT(3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_VS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_HS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_DS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_GS);
GEN7_EMIT(3DSTATE_BINDING_TABLE_POINTERS_PS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_VS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_HS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_DS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_GS);
GEN7_EMIT(3DSTATE_SAMPLER_STATE_POINTERS_PS);
GEN7_EMIT(3DSTATE_URB_VS);
GEN7_EMIT(3DSTATE_URB_HS);
GEN7_EMIT(3DSTATE_URB_DS);
GEN7_EMIT(3DSTATE_URB_GS);
GEN7_EMIT(3DSTATE_DRAWING_RECTANGLE);
GEN7_EMIT(3DSTATE_POLY_STIPPLE_OFFSET);
GEN7_EMIT(3DSTATE_POLY_STIPPLE_PATTERN);
GEN7_EMIT(3DSTATE_LINE_STIPPLE);
GEN7_EMIT(3DSTATE_AA_LINE_PARAMETERS);
GEN7_EMIT(3DSTATE_MULTISAMPLE);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
GEN7_EMIT(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
GEN7_EMIT(3DSTATE_SO_DECL_LIST);
GEN7_EMIT(3DSTATE_SO_BUFFER);
GEN7_EMIT(PIPE_CONTROL);
GEN7_EMIT(3DPRIMITIVE);
GEN7_EMIT(INTERFACE_DESCRIPTOR_DATA);
GEN7_EMIT(SF_CLIP_VIEWPORT);
GEN7_EMIT(CC_VIEWPORT);
GEN7_EMIT(COLOR_CALC_STATE);
GEN7_EMIT(BLEND_STATE);
GEN7_EMIT(DEPTH_STENCIL_STATE);
GEN7_EMIT(SCISSOR_RECT);
GEN7_EMIT(BINDING_TABLE_STATE);
GEN7_EMIT(SURFACE_STATE);
GEN7_EMIT(SAMPLER_STATE);
GEN7_EMIT(SAMPLER_BORDER_COLOR_STATE);
GEN7_EMIT(push_constant_buffer);
#undef GEN7_EMIT
};
 
const struct ilo_gpe_gen7 *
ilo_gpe_gen7_get(void);
 
#endif /* ILO_GPE_GEN7_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpgpu.c
0,0 → 1,49
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "ilo_context.h"
#include "ilo_gpgpu.h"
 
/*
* This is a placeholder. We will need something similar to ilo_3d_pipeline.
*/
 
static void
ilo_launch_grid(struct pipe_context *pipe,
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
{
}
 
/**
* Initialize GPGPU-related functions.
*/
void
ilo_init_gpgpu_functions(struct ilo_context *ilo)
{
ilo->base.launch_grid = ilo_launch_grid;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_gpgpu.h
0,0 → 1,38
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_GPGPU_H
#define ILO_GPGPU_H
 
#include "ilo_common.h"
 
struct ilo_context;
 
void
ilo_init_gpgpu_functions(struct ilo_context *ilo);
 
#endif /* ILO_GPGPU_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_public.h
0,0 → 1,37
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_PUBLIC_H
#define ILO_PUBLIC_H
 
struct intel_winsys;
struct pipe_screen;
 
struct pipe_screen *
ilo_screen_create(struct intel_winsys *ws);
 
#endif /* ILO_PUBLIC_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_query.c
0,0 → 1,238
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "intel_winsys.h"
 
#include "ilo_3d.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_query.h"
 
static const struct {
const char *name;
 
void (*begin)(struct ilo_context *ilo, struct ilo_query *q);
void (*end)(struct ilo_context *ilo, struct ilo_query *q);
void (*process)(struct ilo_context *ilo, struct ilo_query *q);
} query_info[PIPE_QUERY_TYPES] = {
#define INFO(prefix, desc) { \
.name = desc, \
.begin = prefix ## _begin_query, \
.end = prefix ## _end_query, \
.process = prefix ## _process_query, \
}
#define INFOX(prefix, desc) { desc, NULL, NULL, NULL, }
 
[PIPE_QUERY_OCCLUSION_COUNTER] = INFO(ilo_3d, "occlusion counter"),
[PIPE_QUERY_OCCLUSION_PREDICATE] = INFOX(ilo_3d, "occlusion pred."),
[PIPE_QUERY_TIMESTAMP] = INFO(ilo_3d, "timestamp"),
[PIPE_QUERY_TIMESTAMP_DISJOINT] = INFOX(ilo_3d, "timestamp disjoint"),
[PIPE_QUERY_TIME_ELAPSED] = INFO(ilo_3d, "time elapsed"),
[PIPE_QUERY_PRIMITIVES_GENERATED] = INFO(ilo_3d, "primitives generated"),
[PIPE_QUERY_PRIMITIVES_EMITTED] = INFO(ilo_3d, "primitives emitted"),
[PIPE_QUERY_SO_STATISTICS] = INFOX(ilo_3d, "so statistics"),
[PIPE_QUERY_SO_OVERFLOW_PREDICATE] = INFOX(ilo_3d, "so overflow pred."),
[PIPE_QUERY_GPU_FINISHED] = INFOX(ilo_3d, "gpu finished"),
[PIPE_QUERY_PIPELINE_STATISTICS] = INFOX(ilo_3d, "pipeline statistics"),
 
#undef INFO
#undef INFOX
};
 
static inline struct ilo_query *
ilo_query(struct pipe_query *query)
{
return (struct ilo_query *) query;
}
 
static struct pipe_query *
ilo_create_query(struct pipe_context *pipe, unsigned query_type)
{
struct ilo_query *q;
 
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
break;
default:
return NULL;
}
 
q = CALLOC_STRUCT(ilo_query);
if (!q)
return NULL;
 
q->type = query_type;
list_inithead(&q->list);
 
return (struct pipe_query *) q;
}
 
static void
ilo_destroy_query(struct pipe_context *pipe, struct pipe_query *query)
{
struct ilo_query *q = ilo_query(query);
 
if (q->bo)
intel_bo_unreference(q->bo);
 
FREE(q);
}
 
static void
ilo_begin_query(struct pipe_context *pipe, struct pipe_query *query)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_query *q = ilo_query(query);
 
q->active = true;
 
query_info[q->type].begin(ilo, q);
}
 
static void
ilo_end_query(struct pipe_context *pipe, struct pipe_query *query)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_query *q = ilo_query(query);
 
query_info[q->type].end(ilo, q);
 
/*
* some queries such as timestamp query does not require a call to
* begin_query() so q->active is always false
*/
q->active = false;
}
 
/**
* The type (union pipe_query_result) indicates only the size of the buffer.
* Callers expect the result to be "serialized".
*/
static void
serialize_query_data(unsigned type, const union pipe_query_result *data,
void *buf)
{
switch (type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
{
uint64_t *r = buf;
r[0] = data->u64;
}
break;
default:
memset(buf, 0, sizeof(union pipe_query_result));
break;
}
}
 
static boolean
ilo_get_query_result(struct pipe_context *pipe, struct pipe_query *query,
boolean wait, union pipe_query_result *result)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_query *q = ilo_query(query);
 
if (q->active)
return false;
 
if (q->bo) {
if (intel_bo_references(ilo->cp->bo, q->bo))
ilo_cp_flush(ilo->cp);
 
if (!wait && intel_bo_is_busy(q->bo))
return false;
 
query_info[q->type].process(ilo, q);
}
 
if (result)
serialize_query_data(q->type, &q->data, (void *) result);
 
return true;
}
 
/**
* Allocate a query bo for reading hardware statistics.
*
* \param reg_count specifies how many registers need to be read.
* \param repeat_count specifies how many times the registers are read. If
* zero or negative, a 4KB bo is allocated.
*/
bool
ilo_query_alloc_bo(struct ilo_query *q, int reg_count, int repeat_count,
struct intel_winsys *winsys)
{
const char *name;
int reg_total;
 
name = query_info[q->type].name;
 
reg_total = reg_count * repeat_count;
if (reg_total <= 0)
reg_total = 4096 / sizeof(uint64_t);
 
/* (re-)allocate the bo */
if (q->reg_total < reg_total) {
/* registers are 64-bit */
const int size = reg_total * sizeof(uint64_t);
 
if (q->bo)
intel_bo_unreference(q->bo);
 
q->bo = intel_winsys_alloc_buffer(winsys, name, size, 0);
q->reg_total = (q->bo) ? reg_total : 0;
}
 
/* avoid partial reads */
if (reg_count)
q->reg_total -= q->reg_total % reg_count;
 
q->reg_read = 0;
 
return (q->bo != NULL);
}
 
/**
* Initialize query-related functions.
*/
void
ilo_init_query_functions(struct ilo_context *ilo)
{
ilo->base.create_query = ilo_create_query;
ilo->base.destroy_query = ilo_destroy_query;
ilo->base.begin_query = ilo_begin_query;
ilo->base.end_query = ilo_end_query;
ilo->base.get_query_result = ilo_get_query_result;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_query.h
0,0 → 1,62
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_QUERY_H
#define ILO_QUERY_H
 
#include "ilo_common.h"
 
struct intel_bo;
struct ilo_context;
 
/**
* Queries can be bound to various places in the driver. While bound, it tells
* the driver to collect the data indicated by the type of the query.
*/
struct ilo_query {
unsigned type;
bool active;
 
struct list_head list;
 
/* storage for the collected data */
union pipe_query_result data;
 
/* for queries that need to read hardware statistics */
struct intel_bo *bo;
int reg_read, reg_total;
int reg_cmd_size; /* in dwords, as expected by ilo_cp */
};
 
void
ilo_init_query_functions(struct ilo_context *ilo);
 
bool
ilo_query_alloc_bo(struct ilo_query *q, int reg_count, int repeat_count,
struct intel_winsys *winsys);
 
#endif /* ILO_QUERY_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_resource.c
0,0 → 1,1371
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "ilo_screen.h"
#include "ilo_resource.h"
 
/* use PIPE_BIND_CUSTOM to indicate MCS */
#define ILO_BIND_MCS PIPE_BIND_CUSTOM
 
struct tex_layout {
const struct ilo_dev_info *dev;
const struct pipe_resource *templ;
 
enum pipe_format format;
unsigned block_width, block_height, block_size;
bool compressed;
bool has_depth, has_stencil, separate_stencil;
 
enum intel_tiling_mode tiling;
bool can_be_linear;
 
bool array_spacing_full;
bool interleaved;
 
struct {
int w, h, d;
struct ilo_texture_slice *slices;
} levels[PIPE_MAX_TEXTURE_LEVELS];
 
int align_i, align_j;
int qpitch;
 
int width, height;
};
 
static void
tex_layout_init_qpitch(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
int h0, h1;
 
if (templ->array_size <= 1)
return;
 
h0 = align(layout->levels[0].h, layout->align_j);
 
if (!layout->array_spacing_full) {
layout->qpitch = h0;
return;
}
 
h1 = align(layout->levels[1].h, layout->align_j);
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 115:
*
* "The following equation is used for surface formats other than
* compressed textures:
*
* QPitch = (h0 + h1 + 11j)"
*
* "The equation for compressed textures (BC* and FXT1 surface formats)
* follows:
*
* QPitch = (h0 + h1 + 11j) / 4"
*
* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
* value calculated in the equation above, for every other odd Surface
* Height starting from 1 i.e. 1,5,9,13"
*
* From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
*
* "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
* buffer and stencil buffer have an implied value of ARYSPC_FULL):
*
* QPitch = (h0 + h1 + 12j)
* QPitch = (h0 + h1 + 12j) / 4 (compressed)
*
* (There are many typos or missing words here...)"
*
* To access the N-th slice, an offset of (Stride * QPitch * N) is added to
* the base address. The PRM divides QPitch by 4 for compressed formats
* because the block height for those formats are 4, and it wants QPitch to
* mean the number of memory rows, as opposed to texel rows, between
* slices. Since we use texel rows in tex->slice_offsets, we do not need
* to divide QPitch by 4.
*/
layout->qpitch = h0 + h1 +
((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
 
if (layout->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
templ->height0 % 4 == 1)
layout->qpitch += 4;
}
 
static void
tex_layout_init_alignments(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 113:
*
* "surface format align_i align_j
* YUV 4:2:2 formats 4 *see below
* BC1-5 4 4
* FXT1 8 4
* all other formats 4 *see below"
*
* "- align_j = 4 for any depth buffer
* - align_j = 2 for separate stencil buffer
* - align_j = 4 for any render target surface is multisampled (4x)
* - align_j = 4 for any render target surface with Surface Vertical
* Alignment = VALIGN_4
* - align_j = 2 for any render target surface with Surface Vertical
* Alignment = VALIGN_2
* - align_j = 2 for all other render target surface
* - align_j = 2 for any sampling engine surface with Surface Vertical
* Alignment = VALIGN_2
* - align_j = 4 for any sampling engine surface with Surface Vertical
* Alignment = VALIGN_4"
*
* From the Sandy Bridge PRM, volume 4 part 1, page 86:
*
* "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
* the Surface Format is 96 bits per element (BPE)."
*
* They can be rephrased as
*
* align_i align_j
* compressed formats block width block height
* PIPE_FORMAT_S8_UINT 4 2
* other depth/stencil formats 4 4
* 4x multisampled 4 4
* bpp 96 4 2
* others 4 2 or 4
*/
 
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 110:
*
* "surface defined by surface format align_i align_j
* 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
* not D16_UNORM 4 4
* 3DSTATE_STENCIL_BUFFER N/A 8 8
* SURFACE_STATE BC*, ETC*, EAC* 4 4
* FXT1 8 4
* all others (set by SURFACE_STATE)"
*
* From the Ivy Bridge PRM, volume 4 part 1, page 63:
*
* "- This field (Surface Vertical Aligment) is intended to be set to
* VALIGN_4 if the surface was rendered as a depth buffer, for a
* multisampled (4x) render target, or for a multisampled (8x)
* render target, since these surfaces support only alignment of 4.
* - Use of VALIGN_4 for other surfaces is supported, but uses more
* memory.
* - This field must be set to VALIGN_4 for all tiled Y Render Target
* surfaces.
* - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
* YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
* must be set to VALIGN_4."
* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
*
* "- This field (Surface Horizontal Aligment) is intended to be set to
* HALIGN_8 only if the surface was rendered as a depth buffer with
* Z16 format or a stencil buffer, since these surfaces support only
* alignment of 8.
* - Use of HALIGN_8 for other surfaces is supported, but uses more
* memory.
* - This field must be set to HALIGN_4 if the Surface Format is BC*.
* - This field must be set to HALIGN_8 if the Surface Format is
* FXT1."
*
* They can be rephrased as
*
* align_i align_j
* compressed formats block width block height
* PIPE_FORMAT_Z16_UNORM 8 4
* PIPE_FORMAT_S8_UINT 8 8
* other depth/stencil formats 4 or 8 4
* 2x or 4x multisampled 4 or 8 4
* tiled Y 4 or 8 4 (if rt)
* PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
* others 4 or 8 2 or 4
*/
 
if (layout->compressed) {
/* this happens to be the case */
layout->align_i = layout->block_width;
layout->align_j = layout->block_height;
}
else if (layout->has_depth || layout->has_stencil) {
if (layout->dev->gen >= ILO_GEN(7)) {
switch (layout->format) {
case PIPE_FORMAT_Z16_UNORM:
layout->align_i = 8;
layout->align_j = 4;
break;
case PIPE_FORMAT_S8_UINT:
layout->align_i = 8;
layout->align_j = 8;
break;
default:
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 319:
*
* "The 3 LSBs of both offsets (Depth Coordinate Offset Y and
* Depth Coordinate Offset X) must be zero to ensure correct
* alignment"
*
* We will make use of them and setting align_i to 8 help us meet
* the requirement.
*/
layout->align_i = (templ->last_level > 0) ? 8 : 4;
layout->align_j = 4;
break;
}
}
else {
switch (layout->format) {
case PIPE_FORMAT_S8_UINT:
layout->align_i = 4;
layout->align_j = 2;
break;
default:
layout->align_i = 4;
layout->align_j = 4;
break;
}
}
}
else {
const bool valign_4 = (templ->nr_samples > 1) ||
(layout->dev->gen >= ILO_GEN(7) &&
layout->tiling == INTEL_TILING_Y &&
(templ->bind & PIPE_BIND_RENDER_TARGET));
 
if (valign_4)
assert(layout->block_size != 12);
 
layout->align_i = 4;
layout->align_j = (valign_4) ? 4 : 2;
}
 
/*
* the fact that align i and j are multiples of block width and height
* respectively is what makes the size of the bo a multiple of the block
* size, slices start at block boundaries, and many of the computations
* work.
*/
assert(layout->align_i % layout->block_width == 0);
assert(layout->align_j % layout->block_height == 0);
 
/* make sure align() works */
assert(util_is_power_of_two(layout->align_i) &&
util_is_power_of_two(layout->align_j));
assert(util_is_power_of_two(layout->block_width) &&
util_is_power_of_two(layout->block_height));
}
 
static void
tex_layout_init_levels(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
int last_level, lv;
 
last_level = templ->last_level;
 
/* need at least 2 levels to compute full qpitch */
if (last_level == 0 && templ->array_size > 1 && layout->array_spacing_full)
last_level++;
 
/* compute mip level sizes */
for (lv = 0; lv <= last_level; lv++) {
int w, h, d;
 
w = u_minify(templ->width0, lv);
h = u_minify(templ->height0, lv);
d = u_minify(templ->depth0, lv);
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 114:
*
* "The dimensions of the mip maps are first determined by applying
* the sizing algorithm presented in Non-Power-of-Two Mipmaps
* above. Then, if necessary, they are padded out to compression
* block boundaries."
*/
w = align(w, layout->block_width);
h = align(h, layout->block_height);
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 111:
*
* "If the surface is multisampled (4x), these values must be
* adjusted as follows before proceeding:
*
* W_L = ceiling(W_L / 2) * 4
* H_L = ceiling(H_L / 2) * 4"
*
* From the Ivy Bridge PRM, volume 1 part 1, page 108:
*
* "If the surface is multisampled and it is a depth or stencil
* surface or Multisampled Surface StorageFormat in SURFACE_STATE
* is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
* before proceeding:
*
* #samples W_L = H_L =
* 2 ceiling(W_L / 2) * 4 HL [no adjustment]
* 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
* 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
* 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
*
* For interleaved samples (4x), where pixels
*
* (x, y ) (x+1, y )
* (x, y+1) (x+1, y+1)
*
* would be is occupied by
*
* (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
* (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
* (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
* (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
*
* Thus the need to
*
* w = align(w, 2) * 2;
* y = align(y, 2) * 2;
*/
if (layout->interleaved) {
switch (templ->nr_samples) {
case 0:
case 1:
break;
case 2:
w = align(w, 2) * 2;
break;
case 4:
w = align(w, 2) * 2;
h = align(h, 2) * 2;
break;
case 8:
w = align(w, 2) * 4;
h = align(h, 2) * 2;
break;
case 16:
w = align(w, 2) * 4;
h = align(h, 2) * 4;
break;
default:
assert(!"unsupported sample count");
break;
}
}
 
layout->levels[lv].w = w;
layout->levels[lv].h = h;
layout->levels[lv].d = d;
}
}
 
static void
tex_layout_init_spacing(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
 
if (layout->dev->gen >= ILO_GEN(7)) {
/*
* It is not explicitly states, but render targets are expected to be
* UMS/CMS (samples non-interleaved) and depth/stencil buffers are
* expected to be IMS (samples interleaved).
*
* See "Multisampled Surface Storage Format" field of SURFACE_STATE.
*/
if (layout->has_depth || layout->has_stencil) {
layout->interleaved = true;
 
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 111:
*
* "note that the depth buffer and stencil buffer have an implied
* value of ARYSPC_FULL"
*/
layout->array_spacing_full = true;
}
else {
layout->interleaved = false;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 66:
*
* "If Multisampled Surface Storage Format is MSFMT_MSS and
* Number of Multisamples is not MULTISAMPLECOUNT_1, this field
* (Surface Array Spacing) must be set to ARYSPC_LOD0."
*
* As multisampled resources are not mipmapped, we never use
* ARYSPC_FULL for them.
*/
if (templ->nr_samples > 1)
assert(templ->last_level == 0);
layout->array_spacing_full = (templ->last_level > 0);
}
}
else {
/* GEN6 supports only interleaved samples */
layout->interleaved = true;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 115:
*
* "The separate stencil buffer does not support mip mapping, thus
* the storage for LODs other than LOD 0 is not needed. The
* following QPitch equation applies only to the separate stencil
* buffer:
*
* QPitch = h_0"
*
* GEN6 does not support compact spacing otherwise.
*/
layout->array_spacing_full = (layout->format != PIPE_FORMAT_S8_UINT);
}
}
 
static void
tex_layout_init_tiling(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
const enum pipe_format format = layout->format;
const unsigned tile_none = 1 << INTEL_TILING_NONE;
const unsigned tile_x = 1 << INTEL_TILING_X;
const unsigned tile_y = 1 << INTEL_TILING_Y;
unsigned valid_tilings = tile_none | tile_x | tile_y;
 
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 32:
*
* "Display/Overlay Y-Major not supported.
* X-Major required for Async Flips"
*/
if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
valid_tilings &= tile_x;
 
/*
* From the Sandy Bridge PRM, volume 3 part 2, page 158:
*
* "The cursor surface address must be 4K byte aligned. The cursor must
* be in linear memory, it cannot be tiled."
*/
if (unlikely(templ->bind & PIPE_BIND_CURSOR))
valid_tilings &= tile_none;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 76:
*
* "The MCS surface must be stored as Tile Y."
*/
if (templ->bind & ILO_BIND_MCS)
valid_tilings &= tile_y;
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 318:
*
* "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
* Depth Buffer is not supported."
*
* "The Depth Buffer, if tiled, must use Y-Major tiling."
*
* From the Sandy Bridge PRM, volume 1 part 2, page 22:
*
* "W-Major Tile Format is used for separate stencil."
*
* Since the HW does not support W-tiled fencing, we have to do it in the
* driver.
*/
if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
switch (format) {
case PIPE_FORMAT_S8_UINT:
valid_tilings &= tile_none;
break;
default:
valid_tilings &= tile_y;
break;
}
}
 
if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
if (templ->bind & PIPE_BIND_RENDER_TARGET) {
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 32:
*
* "NOTE: 128BPE Format Color buffer ( render target ) MUST be
* either TileX or Linear."
*/
if (layout->block_size == 16)
valid_tilings &= ~tile_y;
 
/*
* From the Ivy Bridge PRM, volume 4 part 1, page 63:
*
* "This field (Surface Vertical Aligment) must be set to
* VALIGN_4 for all tiled Y Render Target surfaces."
*
* "VALIGN_4 is not supported for surface format
* R32G32B32_FLOAT."
*/
if (layout->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
valid_tilings &= ~tile_y;
}
 
/*
* Also, heuristically set a minimum width/height for enabling tiling.
*/
if (templ->width0 < 64 && (valid_tilings & ~tile_x))
valid_tilings &= ~tile_x;
 
if ((templ->width0 < 32 || templ->height0 < 16) &&
(templ->width0 < 16 || templ->height0 < 32) &&
(valid_tilings & ~tile_y))
valid_tilings &= ~tile_y;
}
else {
/* force linear if we are not sure where the texture is bound to */
if (valid_tilings & tile_none)
valid_tilings &= tile_none;
}
 
/* no conflicting binding flags */
assert(valid_tilings);
 
/* prefer tiled than linear */
if (valid_tilings & tile_y)
layout->tiling = INTEL_TILING_Y;
else if (valid_tilings & tile_x)
layout->tiling = INTEL_TILING_X;
else
layout->tiling = INTEL_TILING_NONE;
 
layout->can_be_linear = valid_tilings & tile_none;
}
 
static void
tex_layout_init_format(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
enum pipe_format format;
const struct util_format_description *desc;
bool separate_stencil;
 
/* GEN7+ requires separate stencil buffers */
separate_stencil = (layout->dev->gen >= ILO_GEN(7));
 
switch (templ->format) {
case PIPE_FORMAT_ETC1_RGB8:
format = PIPE_FORMAT_R8G8B8X8_UNORM;
break;
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
if (separate_stencil) {
format = PIPE_FORMAT_Z24X8_UNORM;
layout->separate_stencil = true;
}
else {
format = templ->format;
}
break;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
if (separate_stencil) {
format = PIPE_FORMAT_Z32_FLOAT;
layout->separate_stencil = true;
}
else {
format = templ->format;
}
break;
default:
format = templ->format;
break;
}
 
layout->format = format;
 
layout->block_width = util_format_get_blockwidth(format);
layout->block_height = util_format_get_blockheight(format);
layout->block_size = util_format_get_blocksize(format);
layout->compressed = util_format_is_compressed(format);
 
desc = util_format_description(format);
layout->has_depth = util_format_has_depth(desc);
layout->has_stencil = util_format_has_stencil(desc);
}
 
static void
tex_layout_init(struct tex_layout *layout,
struct pipe_screen *screen,
const struct pipe_resource *templ,
struct ilo_texture_slice **slices)
{
struct ilo_screen *is = ilo_screen(screen);
 
memset(layout, 0, sizeof(*layout));
 
layout->dev = &is->dev;
layout->templ = templ;
 
/* note that there are dependencies between these functions */
tex_layout_init_format(layout);
tex_layout_init_tiling(layout);
tex_layout_init_spacing(layout);
tex_layout_init_levels(layout);
tex_layout_init_alignments(layout);
tex_layout_init_qpitch(layout);
 
if (slices) {
int lv;
 
for (lv = 0; lv <= templ->last_level; lv++)
layout->levels[lv].slices = slices[lv];
}
}
 
static bool
tex_layout_force_linear(struct tex_layout *layout)
{
if (!layout->can_be_linear)
return false;
 
/*
* we may be able to switch from VALIGN_4 to VALIGN_2 when the layout was
* Y-tiled, but let's keep it simple
*/
layout->tiling = INTEL_TILING_NONE;
 
return true;
}
 
/**
* Layout a 2D texture.
*/
static void
tex_layout_2d(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
unsigned int level_x, level_y, num_slices;
int lv;
 
level_x = 0;
level_y = 0;
for (lv = 0; lv <= templ->last_level; lv++) {
const unsigned int level_w = layout->levels[lv].w;
const unsigned int level_h = layout->levels[lv].h;
int slice;
 
/* set slice offsets */
if (layout->levels[lv].slices) {
for (slice = 0; slice < templ->array_size; slice++) {
layout->levels[lv].slices[slice].x = level_x;
/* slices are qpitch apart in Y-direction */
layout->levels[lv].slices[slice].y =
level_y + layout->qpitch * slice;
}
}
 
/* extend the size of the monolithic bo to cover this mip level */
if (layout->width < level_x + level_w)
layout->width = level_x + level_w;
if (layout->height < level_y + level_h)
layout->height = level_y + level_h;
 
/* MIPLAYOUT_BELOW */
if (lv == 1)
level_x += align(level_w, layout->align_i);
else
level_y += align(level_h, layout->align_j);
}
 
num_slices = templ->array_size;
/* samples of the same index are stored in a slice */
if (templ->nr_samples > 1 && !layout->interleaved)
num_slices *= templ->nr_samples;
 
/* we did not take slices into consideration in the computation above */
layout->height += layout->qpitch * (num_slices - 1);
}
 
/**
* Layout a 3D texture.
*/
static void
tex_layout_3d(struct tex_layout *layout)
{
const struct pipe_resource *templ = layout->templ;
unsigned int level_y;
int lv;
 
level_y = 0;
for (lv = 0; lv <= templ->last_level; lv++) {
const unsigned int level_w = layout->levels[lv].w;
const unsigned int level_h = layout->levels[lv].h;
const unsigned int level_d = layout->levels[lv].d;
const unsigned int slice_pitch = align(level_w, layout->align_i);
const unsigned int slice_qpitch = align(level_h, layout->align_j);
const unsigned int num_slices_per_row = 1 << lv;
int slice;
 
for (slice = 0; slice < level_d; slice += num_slices_per_row) {
int i;
 
/* set slice offsets */
if (layout->levels[lv].slices) {
for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
layout->levels[lv].slices[slice + i].x = slice_pitch * i;
layout->levels[lv].slices[slice + i].y = level_y;
}
}
 
/* move on to the next slice row */
level_y += slice_qpitch;
}
 
/* rightmost slice */
slice = MIN2(num_slices_per_row, level_d) - 1;
 
/* extend the size of the monolithic bo to cover this slice */
if (layout->width < slice_pitch * slice + level_w)
layout->width = slice_pitch * slice + level_w;
if (lv == templ->last_level)
layout->height = (level_y - slice_qpitch) + level_h;
}
}
 
static void
tex_layout_validate(struct tex_layout *layout)
{
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
* "To determine the necessary padding on the bottom and right side of
* the surface, refer to the table in Section 7.18.3.4 for the i and j
* parameters for the surface format in use. The surface must then be
* extended to the next multiple of the alignment unit size in each
* dimension, and all texels contained in this extended surface must
* have valid GTT entries."
*
* "For cube surfaces, an additional two rows of padding are required
* at the bottom of the surface. This must be ensured regardless of
* whether the surface is stored tiled or linear. This is due to the
* potential rotation of cache line orientation from memory to cache."
*
* "For compressed textures (BC* and FXT1 surface formats), padding at
* the bottom of the surface is to an even compressed row, which is
* equal to a multiple of 8 uncompressed texel rows. Thus, for padding
* purposes, these surfaces behave as if j = 8 only for surface
* padding purposes. The value of 4 for j still applies for mip level
* alignment and QPitch calculation."
*/
if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) {
layout->width = align(layout->width, layout->align_i);
layout->height = align(layout->height, layout->align_j);
 
if (layout->templ->target == PIPE_TEXTURE_CUBE)
layout->height += 2;
 
if (layout->compressed)
layout->height = align(layout->height, layout->align_j * 2);
}
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
* "If the surface contains an odd number of rows of data, a final row
* below the surface must be allocated."
*/
if (layout->templ->bind & PIPE_BIND_RENDER_TARGET)
layout->height = align(layout->height, 2);
 
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 22:
*
* "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for
* W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes."
*
* Since we ask for INTEL_TILING_NONE instead of the non-existent
* INTEL_TILING_W, we need to manually align the width and height to the
* tile boundaries.
*/
if (layout->templ->format == PIPE_FORMAT_S8_UINT) {
layout->width = align(layout->width, 64);
layout->height = align(layout->height, 64);
}
 
assert(layout->width % layout->block_width == 0);
assert(layout->height % layout->block_height == 0);
assert(layout->qpitch % layout->block_height == 0);
}
 
static size_t
tex_layout_estimate_size(const struct tex_layout *layout)
{
unsigned stride, height;
 
stride = (layout->width / layout->block_width) * layout->block_size;
height = layout->height / layout->block_height;
 
switch (layout->tiling) {
case INTEL_TILING_X:
stride = align(stride, 512);
height = align(height, 8);
break;
case INTEL_TILING_Y:
stride = align(stride, 128);
height = align(height, 32);
break;
default:
height = align(height, 2);
break;
}
 
return stride * height;
}
 
static void
tex_layout_apply(const struct tex_layout *layout, struct ilo_texture *tex)
{
tex->bo_format = layout->format;
 
/* in blocks */
tex->bo_width = layout->width / layout->block_width;
tex->bo_height = layout->height / layout->block_height;
tex->bo_cpp = layout->block_size;
tex->tiling = layout->tiling;
 
tex->compressed = layout->compressed;
tex->block_width = layout->block_width;
tex->block_height = layout->block_height;
 
tex->halign_8 = (layout->align_i == 8);
tex->valign_4 = (layout->align_j == 4);
tex->array_spacing_full = layout->array_spacing_full;
tex->interleaved = layout->interleaved;
}
 
static void
tex_free_slices(struct ilo_texture *tex)
{
FREE(tex->slice_offsets[0]);
}
 
static bool
tex_alloc_slices(struct ilo_texture *tex)
{
const struct pipe_resource *templ = &tex->base;
struct ilo_texture_slice *slices;
int depth, lv;
 
/* sum the depths of all levels */
depth = 0;
for (lv = 0; lv <= templ->last_level; lv++)
depth += u_minify(templ->depth0, lv);
 
/*
* There are (depth * tex->base.array_size) slices in total. Either depth
* is one (non-3D) or templ->array_size is one (non-array), but it does
* not matter.
*/
slices = CALLOC(depth * templ->array_size, sizeof(*slices));
if (!slices)
return false;
 
tex->slice_offsets[0] = slices;
 
/* point to the respective positions in the buffer */
for (lv = 1; lv <= templ->last_level; lv++) {
tex->slice_offsets[lv] = tex->slice_offsets[lv - 1] +
u_minify(templ->depth0, lv - 1) * templ->array_size;
}
 
return true;
}
 
static bool
tex_create_bo(struct ilo_texture *tex,
const struct winsys_handle *handle)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
const char *name;
struct intel_bo *bo;
enum intel_tiling_mode tiling;
unsigned long pitch;
 
switch (tex->base.target) {
case PIPE_TEXTURE_1D:
name = "1D texture";
break;
case PIPE_TEXTURE_2D:
name = "2D texture";
break;
case PIPE_TEXTURE_3D:
name = "3D texture";
break;
case PIPE_TEXTURE_CUBE:
name = "cube texture";
break;
case PIPE_TEXTURE_RECT:
name = "rectangle texture";
break;
case PIPE_TEXTURE_1D_ARRAY:
name = "1D array texture";
break;
case PIPE_TEXTURE_2D_ARRAY:
name = "2D array texture";
break;
case PIPE_TEXTURE_CUBE_ARRAY:
name = "cube array texture";
break;
default:
name ="unknown texture";
break;
}
 
if (handle) {
bo = intel_winsys_import_handle(is->winsys, name, handle,
tex->bo_width, tex->bo_height, tex->bo_cpp,
&tiling, &pitch);
}
else {
bo = intel_winsys_alloc_texture(is->winsys, name,
tex->bo_width, tex->bo_height, tex->bo_cpp,
tex->tiling, tex->bo_flags, &pitch);
 
tiling = tex->tiling;
}
 
if (!bo)
return false;
 
if (tex->bo)
intel_bo_unreference(tex->bo);
 
tex->bo = bo;
tex->tiling = tiling;
tex->bo_stride = pitch;
 
return true;
}
 
static void
tex_destroy(struct ilo_texture *tex)
{
if (tex->separate_s8)
tex_destroy(tex->separate_s8);
 
intel_bo_unreference(tex->bo);
tex_free_slices(tex);
FREE(tex);
}
 
static struct pipe_resource *
tex_create(struct pipe_screen *screen,
const struct pipe_resource *templ,
const struct winsys_handle *handle)
{
struct tex_layout layout;
struct ilo_texture *tex;
 
tex = CALLOC_STRUCT(ilo_texture);
if (!tex)
return NULL;
 
tex->base = *templ;
tex->base.screen = screen;
pipe_reference_init(&tex->base.reference, 1);
 
if (!tex_alloc_slices(tex)) {
FREE(tex);
return NULL;
}
 
tex->imported = (handle != NULL);
 
if (tex->base.bind & (PIPE_BIND_DEPTH_STENCIL |
PIPE_BIND_RENDER_TARGET))
tex->bo_flags |= INTEL_ALLOC_FOR_RENDER;
 
tex_layout_init(&layout, screen, templ, tex->slice_offsets);
 
switch (templ->target) {
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE_ARRAY:
tex_layout_2d(&layout);
break;
case PIPE_TEXTURE_3D:
tex_layout_3d(&layout);
break;
default:
assert(!"unknown resource target");
break;
}
 
tex_layout_validate(&layout);
 
/* make sure the bo can be mapped through GTT if tiled */
if (layout.tiling != INTEL_TILING_NONE) {
/*
* Usually only the first 256MB of the GTT is mappable.
*
* See also how intel_context::max_gtt_map_object_size is calculated.
*/
const size_t mappable_gtt_size = 256 * 1024 * 1024;
const size_t size = tex_layout_estimate_size(&layout);
 
/* be conservative */
if (size > mappable_gtt_size / 4)
tex_layout_force_linear(&layout);
}
 
tex_layout_apply(&layout, tex);
 
if (!tex_create_bo(tex, handle)) {
tex_free_slices(tex);
FREE(tex);
return NULL;
}
 
/* allocate separate stencil resource */
if (layout.separate_stencil) {
struct pipe_resource s8_templ = *layout.templ;
struct pipe_resource *s8;
 
/*
* Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other
* tilings. But that should be fine since it will never be bound as the
* stencil buffer, and our transfer code can handle all tilings.
*/
s8_templ.format = PIPE_FORMAT_S8_UINT;
 
s8 = screen->resource_create(screen, &s8_templ);
if (!s8) {
tex_destroy(tex);
return NULL;
}
 
tex->separate_s8 = ilo_texture(s8);
 
assert(tex->separate_s8->bo_format == PIPE_FORMAT_S8_UINT);
}
 
return &tex->base;
}
 
static bool
tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
int err;
 
err = intel_winsys_export_handle(is->winsys, tex->bo,
tex->tiling, tex->bo_stride, handle);
 
return !err;
}
 
/**
* Estimate the texture size. For large textures, the errors should be pretty
* small.
*/
static size_t
tex_estimate_size(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
struct tex_layout layout;
 
tex_layout_init(&layout, screen, templ, NULL);
 
switch (templ->target) {
case PIPE_TEXTURE_3D:
tex_layout_3d(&layout);
break;
default:
tex_layout_2d(&layout);
break;
}
 
tex_layout_validate(&layout);
 
return tex_layout_estimate_size(&layout);
}
 
static bool
buf_create_bo(struct ilo_buffer *buf)
{
struct ilo_screen *is = ilo_screen(buf->base.screen);
const char *name;
struct intel_bo *bo;
 
switch (buf->base.bind) {
case PIPE_BIND_VERTEX_BUFFER:
name = "vertex buffer";
break;
case PIPE_BIND_INDEX_BUFFER:
name = "index buffer";
break;
case PIPE_BIND_CONSTANT_BUFFER:
name = "constant buffer";
break;
case PIPE_BIND_STREAM_OUTPUT:
name = "stream output";
break;
default:
name = "unknown buffer";
break;
}
 
bo = intel_winsys_alloc_buffer(is->winsys,
name, buf->bo_size, buf->bo_flags);
if (!bo)
return false;
 
if (buf->bo)
intel_bo_unreference(buf->bo);
 
buf->bo = bo;
 
return true;
}
 
static void
buf_destroy(struct ilo_buffer *buf)
{
intel_bo_unreference(buf->bo);
FREE(buf);
}
 
static struct pipe_resource *
buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
{
struct ilo_buffer *buf;
 
buf = CALLOC_STRUCT(ilo_buffer);
if (!buf)
return NULL;
 
buf->base = *templ;
buf->base.screen = screen;
pipe_reference_init(&buf->base.reference, 1);
 
buf->bo_size = templ->width0;
buf->bo_flags = 0;
 
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
* "For buffers, which have no inherent "height," padding requirements
* are different. A buffer must be padded to the next multiple of 256
* array elements, with an additional 16 bytes added beyond that to
* account for the L1 cache line."
*/
if (templ->bind & PIPE_BIND_SAMPLER_VIEW)
buf->bo_size = align(buf->bo_size, 256) + 16;
 
if (!buf_create_bo(buf)) {
FREE(buf);
return NULL;
}
 
return &buf->base;
}
 
static boolean
ilo_can_create_resource(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
/*
* We do not know if we will fail until we try to allocate the bo.
* So just set a limit on the texture size.
*/
const size_t max_size = 1 * 1024 * 1024 * 1024;
size_t size;
 
if (templ->target == PIPE_BUFFER)
size = templ->width0;
else
size = tex_estimate_size(screen, templ);
 
return (size <= max_size);
}
 
static struct pipe_resource *
ilo_resource_create(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
if (templ->target == PIPE_BUFFER)
return buf_create(screen, templ);
else
return tex_create(screen, templ, NULL);
}
 
static struct pipe_resource *
ilo_resource_from_handle(struct pipe_screen *screen,
const struct pipe_resource *templ,
struct winsys_handle *handle)
{
if (templ->target == PIPE_BUFFER)
return NULL;
else
return tex_create(screen, templ, handle);
}
 
static boolean
ilo_resource_get_handle(struct pipe_screen *screen,
struct pipe_resource *res,
struct winsys_handle *handle)
{
if (res->target == PIPE_BUFFER)
return false;
else
return tex_get_handle(ilo_texture(res), handle);
 
}
 
static void
ilo_resource_destroy(struct pipe_screen *screen,
struct pipe_resource *res)
{
if (res->target == PIPE_BUFFER)
buf_destroy(ilo_buffer(res));
else
tex_destroy(ilo_texture(res));
}
 
/**
* Initialize resource-related functions.
*/
void
ilo_init_resource_functions(struct ilo_screen *is)
{
is->base.can_create_resource = ilo_can_create_resource;
is->base.resource_create = ilo_resource_create;
is->base.resource_from_handle = ilo_resource_from_handle;
is->base.resource_get_handle = ilo_resource_get_handle;
is->base.resource_destroy = ilo_resource_destroy;
}
 
bool
ilo_buffer_alloc_bo(struct ilo_buffer *buf)
{
return buf_create_bo(buf);
}
 
bool
ilo_texture_alloc_bo(struct ilo_texture *tex)
{
/* a shared bo cannot be reallocated */
if (tex->imported)
return false;
 
return tex_create_bo(tex, NULL);
}
 
/**
* Return the offset (in bytes) to a slice within the bo.
*
* The returned offset is aligned to tile size. Since slices are not
* guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
* from the tile origin to the slice are also returned. X offset is always a
* multiple of 4 and Y offset is always a multiple of 2.
*/
unsigned
ilo_texture_get_slice_offset(const struct ilo_texture *tex,
int level, int slice,
unsigned *x_offset, unsigned *y_offset)
{
unsigned tile_w, tile_h, tile_size, row_size;
unsigned x, y, slice_offset;
 
/* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
 
switch (tex->tiling) {
case INTEL_TILING_NONE:
/* W-tiled */
if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
tile_w = 64;
tile_h = 64;
}
else {
tile_w = 1;
tile_h = 1;
}
break;
case INTEL_TILING_X:
tile_w = 512;
tile_h = 8;
break;
case INTEL_TILING_Y:
tile_w = 128;
tile_h = 32;
break;
default:
assert(!"unknown tiling");
tile_w = 1;
tile_h = 1;
break;
}
 
tile_size = tile_w * tile_h;
row_size = tex->bo_stride * tile_h;
 
/* in bytes */
x = tex->slice_offsets[level][slice].x / tex->block_width * tex->bo_cpp;
y = tex->slice_offsets[level][slice].y / tex->block_height;
slice_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
 
/*
* Since tex->bo_stride is a multiple of tile_w, slice_offset should be
* aligned at this point.
*/
assert(slice_offset % tile_size == 0);
 
/*
* because of the possible values of align_i and align_j in
* tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
* 4 and y_offset is guaranteed to be a multiple of 2.
*/
if (x_offset) {
/* in pixels */
x = (x % tile_w) / tex->bo_cpp * tex->block_width;
assert(x % 4 == 0);
 
*x_offset = x;
}
 
if (y_offset) {
/* in pixels */
y = (y % tile_h) * tex->block_height;
assert(y % 2 == 0);
 
*y_offset = y;
}
 
return slice_offset;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_resource.h
0,0 → 1,113
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_RESOURCE_H
#define ILO_RESOURCE_H
 
#include "intel_winsys.h"
 
#include "ilo_common.h"
 
struct ilo_screen;
 
struct ilo_buffer {
struct pipe_resource base;
 
struct intel_bo *bo;
unsigned bo_size;
unsigned bo_flags;
};
 
struct ilo_texture {
struct pipe_resource base;
 
bool imported;
unsigned bo_flags;
 
enum pipe_format bo_format;
struct intel_bo *bo;
 
/*
* These are the values passed to or returned from winsys for bo
* allocation. As such,
*
* - width and height are in blocks,
* - cpp is the block size in bytes, and
* - stride is the distance in bytes between two block rows.
*/
int bo_width, bo_height, bo_cpp, bo_stride;
enum intel_tiling_mode tiling;
 
bool compressed;
unsigned block_width;
unsigned block_height;
 
/* true if the mip level alignments are stricter */
bool halign_8, valign_4;
/* true if space is reserved between layers */
bool array_spacing_full;
/* true if samples are interleaved */
bool interleaved;
 
/* 2D offsets into a layer/slice/face */
struct ilo_texture_slice {
unsigned x;
unsigned y;
} *slice_offsets[PIPE_MAX_TEXTURE_LEVELS];
 
struct ilo_texture *separate_s8;
};
 
static inline struct ilo_buffer *
ilo_buffer(struct pipe_resource *res)
{
return (struct ilo_buffer *)
((res && res->target == PIPE_BUFFER) ? res : NULL);
}
 
static inline struct ilo_texture *
ilo_texture(struct pipe_resource *res)
{
return (struct ilo_texture *)
((res && res->target != PIPE_BUFFER) ? res : NULL);
}
 
void
ilo_init_resource_functions(struct ilo_screen *is);
 
bool
ilo_buffer_alloc_bo(struct ilo_buffer *buf);
 
bool
ilo_texture_alloc_bo(struct ilo_texture *tex);
 
unsigned
ilo_texture_get_slice_offset(const struct ilo_texture *tex,
int level, int slice,
unsigned *x_offset, unsigned *y_offset);
 
#endif /* ILO_RESOURCE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_screen.c
0,0 → 1,752
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_format_s3tc.h"
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
#include "intel_chipset.h"
#include "intel_reg.h" /* for TIMESTAMP */
#include "intel_winsys.h"
 
#include "ilo_context.h"
#include "ilo_format.h"
#include "ilo_resource.h"
#include "ilo_public.h"
#include "ilo_screen.h"
 
int ilo_debug;
 
static const struct debug_named_value ilo_debug_flags[] = {
{ "3d", ILO_DEBUG_3D, "Dump 3D commands and states" },
{ "vs", ILO_DEBUG_VS, "Dump vertex shaders" },
{ "gs", ILO_DEBUG_GS, "Dump geometry shaders" },
{ "fs", ILO_DEBUG_FS, "Dump fragment shaders" },
{ "cs", ILO_DEBUG_CS, "Dump compute shaders" },
{ "nohw", ILO_DEBUG_NOHW, "Do not send commands to HW" },
{ "nocache", ILO_DEBUG_NOCACHE, "Always invalidate HW caches" },
DEBUG_NAMED_VALUE_END
};
 
static float
ilo_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
{
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
/* in U3.7, defined in 3DSTATE_SF */
return 7.0f;
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
/* line width minus one, which is reserved for AA region */
return 6.0f;
case PIPE_CAPF_MAX_POINT_WIDTH:
/* in U8.3, defined in 3DSTATE_SF */
return 255.0f;
case PIPE_CAPF_MAX_POINT_WIDTH_AA:
/* same as point width, as we ignore rasterizer->point_smooth */
return 255.0f;
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
/* [2.0, 16.0], defined in SAMPLER_STATE */
return 16.0f;
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
/* [-16.0, 16.0), defined in SAMPLER_STATE */
return 15.0f;
case PIPE_CAPF_GUARD_BAND_LEFT:
case PIPE_CAPF_GUARD_BAND_TOP:
case PIPE_CAPF_GUARD_BAND_RIGHT:
case PIPE_CAPF_GUARD_BAND_BOTTOM:
/* what are these for? */
return 0.0f;
 
default:
return 0.0f;
}
}
 
static int
ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
enum pipe_shader_cap param)
{
switch (shader) {
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
break;
default:
return 0;
}
 
switch (param) {
/* the limits are copied from the classic driver */
case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 16384;
case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 0;
case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 0;
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
return (shader == PIPE_SHADER_FRAGMENT) ? 1024 : 0;
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return UINT_MAX;
case PIPE_SHADER_CAP_MAX_INPUTS:
/* this is limited by how many attributes SF can remap */
return 16;
case PIPE_SHADER_CAP_MAX_CONSTS:
return 1024;
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return ILO_MAX_CONST_BUFFERS;
case PIPE_SHADER_CAP_MAX_TEMPS:
return 256;
case PIPE_SHADER_CAP_MAX_ADDRS:
return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
return 0;
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return 0;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1;
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;
case PIPE_SHADER_CAP_SUBROUTINES:
return 0;
case PIPE_SHADER_CAP_INTEGERS:
return 1;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return ILO_MAX_SAMPLERS;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
return 1;
 
default:
return 0;
}
}
 
static int
ilo_get_video_param(struct pipe_screen *screen,
enum pipe_video_profile profile,
enum pipe_video_cap param)
{
switch (param) {
case PIPE_VIDEO_CAP_SUPPORTED:
return vl_profile_supported(screen, profile);
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
case PIPE_VIDEO_CAP_MAX_HEIGHT:
return vl_video_buffer_max_size(screen);
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
return 1;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return 1;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
return 0;
 
default:
return 0;
}
}
 
static int
ilo_get_compute_param(struct pipe_screen *screen,
enum pipe_compute_cap param,
void *ret)
{
union {
const char *ir_target;
uint64_t grid_dimension;
uint64_t max_grid_size[3];
uint64_t max_block_size[3];
uint64_t max_threads_per_block;
uint64_t max_global_size;
uint64_t max_local_size;
uint64_t max_private_size;
uint64_t max_input_size;
uint64_t max_mem_alloc_size;
} val;
const void *ptr;
int size;
 
/* XXX some randomly chosen values */
switch (param) {
case PIPE_COMPUTE_CAP_IR_TARGET:
val.ir_target = "ilog";
 
ptr = val.ir_target;
size = strlen(val.ir_target) + 1;
break;
case PIPE_COMPUTE_CAP_GRID_DIMENSION:
val.grid_dimension = Elements(val.max_grid_size);
 
ptr = &val.grid_dimension;
size = sizeof(val.grid_dimension);
break;
case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
val.max_grid_size[0] = 65535;
val.max_grid_size[1] = 65535;
val.max_grid_size[2] = 1;
 
ptr = &val.max_grid_size;
size = sizeof(val.max_grid_size);
break;
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
val.max_block_size[0] = 512;
val.max_block_size[1] = 512;
val.max_block_size[2] = 512;
 
ptr = &val.max_block_size;
size = sizeof(val.max_block_size);
break;
 
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
val.max_threads_per_block = 512;
 
ptr = &val.max_threads_per_block;
size = sizeof(val.max_threads_per_block);
break;
case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
val.max_global_size = 4;
 
ptr = &val.max_global_size;
size = sizeof(val.max_global_size);
break;
case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
val.max_local_size = 64 * 1024;
 
ptr = &val.max_local_size;
size = sizeof(val.max_local_size);
break;
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
val.max_private_size = 32768;
 
ptr = &val.max_private_size;
size = sizeof(val.max_private_size);
break;
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
val.max_input_size = 256;
 
ptr = &val.max_input_size;
size = sizeof(val.max_input_size);
break;
case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
val.max_mem_alloc_size = 128 * 1024 * 1024;
 
ptr = &val.max_mem_alloc_size;
size = sizeof(val.max_mem_alloc_size);
break;
default:
ptr = NULL;
size = 0;
break;
}
 
if (ret)
memcpy(ret, ptr, size);
 
return size;
}
 
static int
ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
{
struct ilo_screen *is = ilo_screen(screen);
 
switch (param) {
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_TWO_SIDED_STENCIL:
return true;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
return 0; /* TODO */
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_POINT_SPRITE:
return true;
case PIPE_CAP_MAX_RENDER_TARGETS:
return ILO_MAX_DRAW_BUFFERS;
case PIPE_CAP_OCCLUSION_QUERY:
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_TEXTURE_SHADOW_MAP:
case PIPE_CAP_TEXTURE_SWIZZLE: /* must be supported for shadow map */
return true;
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
/*
* As defined in SURFACE_STATE, we have
*
* Max WxHxD for 2D and CUBE Max WxHxD for 3D
* GEN6 8192x8192x512 2048x2048x2048
* GEN7 16384x16384x2048 2048x2048x2048
*
* However, when the texutre size is large, things become unstable. We
* require the maximum texture size to be 2^30 bytes in
* screen->can_create_resource(). Since the maximum pixel size is 2^4
* bytes (PIPE_FORMAT_R32G32B32A32_FLOAT), textures should not have more
* than 2^26 pixels.
*
* For 3D textures, we have to set the maximum number of levels to 9,
* which has at most 2^24 pixels. For 2D textures, we set it to 14,
* which has at most 2^26 pixels. And for cube textures, we has to set
* it to 12.
*/
return 14;
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
return 9;
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 12;
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
return false;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_SM3:
return true;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
if (is->dev.gen >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset)
return 0;
return ILO_MAX_SO_BUFFERS;
case PIPE_CAP_PRIMITIVE_RESTART:
return true;
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return ILO_MAX_SAMPLERS * 2;
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
return true;
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
return (is->dev.gen >= ILO_GEN(7)) ? 2048 : 512;
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
case PIPE_CAP_DEPTH_CLIP_DISABLE:
return true;
case PIPE_CAP_SHADER_STENCIL_EXPORT:
return false;
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
return true;
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
return false;
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
return true;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_SCALED_RESOLVE:
return true;
case PIPE_CAP_MIN_TEXEL_OFFSET:
return -8;
case PIPE_CAP_MAX_TEXEL_OFFSET:
return 7;
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
return true;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
return ILO_MAX_SO_BINDINGS / ILO_MAX_SO_BUFFERS;
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
return ILO_MAX_SO_BINDINGS;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
if (is->dev.gen >= ILO_GEN(7))
return is->dev.has_gen7_sol_reset;
else
return false; /* TODO */
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
return false;
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
return true;
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
return false;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return 140;
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_USER_VERTEX_BUFFERS:
return false;
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
return false;
case PIPE_CAP_COMPUTE:
return false; /* TODO */
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_CONSTANT_BUFFERS:
return true;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
/* imposed by OWord (Dual) Block Read */
return 16;
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_QUERY_TIMESTAMP:
return true;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
return false; /* TODO */
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 0;
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return true;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1;
case PIPE_CAP_TGSI_TEXCOORD:
return false;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return true;
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
return false; /* TODO */
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
/* a BRW_SURFACE_BUFFER can have up to 2^27 elements */
return 1 << 27;
case PIPE_CAP_MAX_VIEWPORTS:
return ILO_MAX_VIEWPORTS;
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_LITTLE;
 
default:
return 0;
}
}
 
static const char *
ilo_get_vendor(struct pipe_screen *screen)
{
return "LunarG, Inc.";
}
 
static const char *
ilo_get_name(struct pipe_screen *screen)
{
struct ilo_screen *is = ilo_screen(screen);
const char *chipset;
 
/* stolen from classic i965 */
switch (is->dev.devid) {
case PCI_CHIP_SANDYBRIDGE_GT1:
case PCI_CHIP_SANDYBRIDGE_GT2:
case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
chipset = "Intel(R) Sandybridge Desktop";
break;
case PCI_CHIP_SANDYBRIDGE_M_GT1:
case PCI_CHIP_SANDYBRIDGE_M_GT2:
case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
chipset = "Intel(R) Sandybridge Mobile";
break;
case PCI_CHIP_SANDYBRIDGE_S:
chipset = "Intel(R) Sandybridge Server";
break;
case PCI_CHIP_IVYBRIDGE_GT1:
case PCI_CHIP_IVYBRIDGE_GT2:
chipset = "Intel(R) Ivybridge Desktop";
break;
case PCI_CHIP_IVYBRIDGE_M_GT1:
case PCI_CHIP_IVYBRIDGE_M_GT2:
chipset = "Intel(R) Ivybridge Mobile";
break;
case PCI_CHIP_IVYBRIDGE_S_GT1:
case PCI_CHIP_IVYBRIDGE_S_GT2:
chipset = "Intel(R) Ivybridge Server";
break;
case PCI_CHIP_BAYTRAIL_M_1:
case PCI_CHIP_BAYTRAIL_M_2:
case PCI_CHIP_BAYTRAIL_M_3:
case PCI_CHIP_BAYTRAIL_M_4:
case PCI_CHIP_BAYTRAIL_D:
chipset = "Intel(R) Bay Trail";
break;
case PCI_CHIP_HASWELL_GT1:
case PCI_CHIP_HASWELL_GT2:
case PCI_CHIP_HASWELL_GT3:
case PCI_CHIP_HASWELL_SDV_GT1:
case PCI_CHIP_HASWELL_SDV_GT2:
case PCI_CHIP_HASWELL_SDV_GT3:
case PCI_CHIP_HASWELL_ULT_GT1:
case PCI_CHIP_HASWELL_ULT_GT2:
case PCI_CHIP_HASWELL_ULT_GT3:
case PCI_CHIP_HASWELL_CRW_GT1:
case PCI_CHIP_HASWELL_CRW_GT2:
case PCI_CHIP_HASWELL_CRW_GT3:
chipset = "Intel(R) Haswell Desktop";
break;
case PCI_CHIP_HASWELL_M_GT1:
case PCI_CHIP_HASWELL_M_GT2:
case PCI_CHIP_HASWELL_M_GT3:
case PCI_CHIP_HASWELL_SDV_M_GT1:
case PCI_CHIP_HASWELL_SDV_M_GT2:
case PCI_CHIP_HASWELL_SDV_M_GT3:
case PCI_CHIP_HASWELL_ULT_M_GT1:
case PCI_CHIP_HASWELL_ULT_M_GT2:
case PCI_CHIP_HASWELL_ULT_M_GT3:
case PCI_CHIP_HASWELL_CRW_M_GT1:
case PCI_CHIP_HASWELL_CRW_M_GT2:
case PCI_CHIP_HASWELL_CRW_M_GT3:
chipset = "Intel(R) Haswell Mobile";
break;
case PCI_CHIP_HASWELL_S_GT1:
case PCI_CHIP_HASWELL_S_GT2:
case PCI_CHIP_HASWELL_S_GT3:
case PCI_CHIP_HASWELL_SDV_S_GT1:
case PCI_CHIP_HASWELL_SDV_S_GT2:
case PCI_CHIP_HASWELL_SDV_S_GT3:
case PCI_CHIP_HASWELL_ULT_S_GT1:
case PCI_CHIP_HASWELL_ULT_S_GT2:
case PCI_CHIP_HASWELL_ULT_S_GT3:
case PCI_CHIP_HASWELL_CRW_S_GT1:
case PCI_CHIP_HASWELL_CRW_S_GT2:
case PCI_CHIP_HASWELL_CRW_S_GT3:
chipset = "Intel(R) Haswell Server";
break;
default:
chipset = "Unknown Intel Chipset";
break;
}
 
return chipset;
}
 
static uint64_t
ilo_get_timestamp(struct pipe_screen *screen)
{
struct ilo_screen *is = ilo_screen(screen);
union {
uint64_t val;
uint32_t dw[2];
} timestamp;
 
intel_winsys_read_reg(is->winsys, TIMESTAMP, &timestamp.val);
 
/*
* From the Ivy Bridge PRM, volume 1 part 3, page 107:
*
* "Note: This timestamp register reflects the value of the PCU TSC.
* The PCU TSC counts 10ns increments; this timestamp reflects bits
* 38:3 of the TSC (i.e. 80ns granularity, rolling over every 1.5
* hours)."
*
* However, it seems dw[0] is garbage and dw[1] contains the lower 32 bits
* of the timestamp. We will have to live with a timestamp that rolls over
* every ~343 seconds.
*
* See also brw_get_timestamp().
*/
return (uint64_t) timestamp.dw[1] * 80;
}
 
static void
ilo_fence_reference(struct pipe_screen *screen,
struct pipe_fence_handle **p,
struct pipe_fence_handle *f)
{
struct ilo_fence **ptr = (struct ilo_fence **) p;
struct ilo_fence *fence = ilo_fence(f);
 
if (!ptr) {
/* still need to reference fence */
if (fence)
pipe_reference(NULL, &fence->reference);
return;
}
 
/* reference fence and dereference the one pointed to by ptr */
if (*ptr && pipe_reference(&(*ptr)->reference, &fence->reference)) {
struct ilo_fence *old = *ptr;
 
if (old->bo)
intel_bo_unreference(old->bo);
FREE(old);
}
 
*ptr = fence;
}
 
static boolean
ilo_fence_signalled(struct pipe_screen *screen,
struct pipe_fence_handle *f)
{
struct ilo_fence *fence = ilo_fence(f);
 
/* mark signalled if the bo is idle */
if (fence->bo && !intel_bo_is_busy(fence->bo)) {
intel_bo_unreference(fence->bo);
fence->bo = NULL;
}
 
return (fence->bo == NULL);
}
 
static boolean
ilo_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *f,
uint64_t timeout)
{
struct ilo_fence *fence = ilo_fence(f);
const int64_t wait_timeout = (timeout > INT64_MAX) ? -1 : timeout;
 
/* already signalled */
if (!fence->bo)
return true;
 
/* wait and see if it returns error */
if (intel_bo_wait(fence->bo, wait_timeout))
return false;
 
/* mark signalled */
intel_bo_unreference(fence->bo);
fence->bo = NULL;
 
return true;
}
 
static void
ilo_screen_destroy(struct pipe_screen *screen)
{
struct ilo_screen *is = ilo_screen(screen);
 
/* as it seems, winsys is owned by the screen */
intel_winsys_destroy(is->winsys);
 
FREE(is);
}
 
static bool
init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
{
dev->devid = info->devid;
dev->has_llc = info->has_llc;
dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
dev->has_address_swizzling = info->has_address_swizzling;
 
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 18:
*
* "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged
* as 1024 256-bit rows. The GT2 product's URB provides 64KB of
* storage, arranged as 2048 256-bit rows. A row corresponds in size
* to an EU GRF register. Read/write access to the URB is generally
* supported on a row-granular basis."
*
* From the Ivy Bridge PRM, volume 4 part 2, page 17:
*
* "URB Size URB Rows URB Rows when SLM Enabled
* 128k 4096 2048
* 256k 8096 4096"
*/
 
if (IS_HASWELL(info->devid)) {
dev->gen = ILO_GEN(7.5);
 
if (IS_HSW_GT3(info->devid)) {
dev->gt = 3;
dev->urb_size = 512 * 1024;
}
else if (IS_HSW_GT2(info->devid)) {
dev->gt = 2;
dev->urb_size = 256 * 1024;
}
else {
dev->gt = 1;
dev->urb_size = 128 * 1024;
}
}
else if (IS_GEN7(info->devid)) {
dev->gen = ILO_GEN(7);
 
if (IS_IVB_GT2(info->devid)) {
dev->gt = 2;
dev->urb_size = 256 * 1024;
}
else {
dev->gt = 1;
dev->urb_size = 128 * 1024;
}
}
else if (IS_GEN6(info->devid)) {
dev->gen = ILO_GEN(6);
 
if (IS_SNB_GT2(info->devid)) {
dev->gt = 2;
dev->urb_size = 64 * 1024;
}
else {
dev->gt = 1;
dev->urb_size = 32 * 1024;
}
}
else {
ilo_err("unknown GPU generation\n");
return false;
}
 
return true;
}
 
struct pipe_screen *
ilo_screen_create(struct intel_winsys *ws)
{
struct ilo_screen *is;
const struct intel_winsys_info *info;
 
ilo_debug = debug_get_flags_option("ILO_DEBUG", ilo_debug_flags, 0);
 
is = CALLOC_STRUCT(ilo_screen);
if (!is)
return NULL;
 
is->winsys = ws;
 
intel_winsys_enable_reuse(is->winsys);
 
info = intel_winsys_get_info(is->winsys);
if (!init_dev(&is->dev, info)) {
FREE(is);
return NULL;
}
 
util_format_s3tc_init();
 
is->base.destroy = ilo_screen_destroy;
is->base.get_name = ilo_get_name;
is->base.get_vendor = ilo_get_vendor;
is->base.get_param = ilo_get_param;
is->base.get_paramf = ilo_get_paramf;
is->base.get_shader_param = ilo_get_shader_param;
is->base.get_video_param = ilo_get_video_param;
is->base.get_compute_param = ilo_get_compute_param;
 
is->base.get_timestamp = ilo_get_timestamp;
 
is->base.flush_frontbuffer = NULL;
 
is->base.fence_reference = ilo_fence_reference;
is->base.fence_signalled = ilo_fence_signalled;
is->base.fence_finish = ilo_fence_finish;
 
is->base.get_driver_query_info = NULL;
 
ilo_init_format_functions(is);
ilo_init_context_functions(is);
ilo_init_resource_functions(is);
 
return &is->base;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_screen.h
0,0 → 1,63
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_SCREEN_H
#define ILO_SCREEN_H
 
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
 
#include "ilo_common.h"
 
struct intel_winsys;
struct intel_bo;
 
struct ilo_fence {
struct pipe_reference reference;
struct intel_bo *bo;
};
 
struct ilo_screen {
struct pipe_screen base;
 
struct intel_winsys *winsys;
struct ilo_dev_info dev;
};
 
static inline struct ilo_screen *
ilo_screen(struct pipe_screen *screen)
{
return (struct ilo_screen *) screen;
}
 
static inline struct ilo_fence *
ilo_fence(struct pipe_fence_handle *fence)
{
return (struct ilo_fence *) fence;
}
 
#endif /* ILO_SCREEN_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_shader.c
0,0 → 1,1169
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "tgsi/tgsi_parse.h"
#include "intel_winsys.h"
#include "brw_defines.h" /* for SBE setup */
 
#include "shader/ilo_shader_internal.h"
#include "ilo_state.h"
#include "ilo_shader.h"
 
struct ilo_shader_cache {
struct list_head shaders;
struct list_head changed;
};
 
/**
* Create a shader cache. A shader cache can manage shaders and upload them
* to a bo as a whole.
*/
struct ilo_shader_cache *
ilo_shader_cache_create(void)
{
struct ilo_shader_cache *shc;
 
shc = CALLOC_STRUCT(ilo_shader_cache);
if (!shc)
return NULL;
 
list_inithead(&shc->shaders);
list_inithead(&shc->changed);
 
return shc;
}
 
/**
* Destroy a shader cache.
*/
void
ilo_shader_cache_destroy(struct ilo_shader_cache *shc)
{
FREE(shc);
}
 
/**
* Add a shader to the cache.
*/
void
ilo_shader_cache_add(struct ilo_shader_cache *shc,
struct ilo_shader_state *shader)
{
struct ilo_shader *sh;
 
shader->cache = shc;
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
sh->uploaded = false;
 
list_add(&shader->list, &shc->changed);
}
 
/**
* Remove a shader from the cache.
*/
void
ilo_shader_cache_remove(struct ilo_shader_cache *shc,
struct ilo_shader_state *shader)
{
list_del(&shader->list);
shader->cache = NULL;
}
 
/**
* Notify the cache that a managed shader has changed.
*/
static void
ilo_shader_cache_notify_change(struct ilo_shader_cache *shc,
struct ilo_shader_state *shader)
{
if (shader->cache == shc) {
list_del(&shader->list);
list_add(&shader->list, &shc->changed);
}
}
 
/**
* Upload a managed shader to the bo.
*/
static int
ilo_shader_cache_upload_shader(struct ilo_shader_cache *shc,
struct ilo_shader_state *shader,
struct intel_bo *bo, unsigned offset,
bool incremental)
{
const unsigned base = offset;
struct ilo_shader *sh;
 
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
int err;
 
if (incremental && sh->uploaded)
continue;
 
/* kernels must be aligned to 64-byte */
offset = align(offset, 64);
 
err = intel_bo_pwrite(bo, offset, sh->kernel_size, sh->kernel);
if (unlikely(err))
return -1;
 
sh->uploaded = true;
sh->cache_offset = offset;
 
offset += sh->kernel_size;
}
 
return (int) (offset - base);
}
 
/**
* Similar to ilo_shader_cache_upload(), except no upload happens.
*/
static int
ilo_shader_cache_get_upload_size(struct ilo_shader_cache *shc,
unsigned offset,
bool incremental)
{
const unsigned base = offset;
struct ilo_shader_state *shader;
 
if (!incremental) {
LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) {
struct ilo_shader *sh;
 
/* see ilo_shader_cache_upload_shader() */
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
if (!incremental || !sh->uploaded)
offset = align(offset, 64) + sh->kernel_size;
}
}
}
 
LIST_FOR_EACH_ENTRY(shader, &shc->changed, list) {
struct ilo_shader *sh;
 
/* see ilo_shader_cache_upload_shader() */
LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
if (!incremental || !sh->uploaded)
offset = align(offset, 64) + sh->kernel_size;
}
}
 
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 112:
*
* "Due to prefetch of the instruction stream, the EUs may attempt to
* access up to 8 instructions (128 bytes) beyond the end of the
* kernel program - possibly into the next memory page. Although
* these instructions will not be executed, software must account for
* the prefetch in order to avoid invalid page access faults."
*/
if (offset > base)
offset += 128;
 
return (int) (offset - base);
}
 
/**
* Upload managed shaders to the bo. When incremental is true, only shaders
* that are changed or added after the last upload are uploaded.
*/
int
ilo_shader_cache_upload(struct ilo_shader_cache *shc,
struct intel_bo *bo, unsigned offset,
bool incremental)
{
struct ilo_shader_state *shader, *next;
int size = 0, s;
 
if (!bo)
return ilo_shader_cache_get_upload_size(shc, offset, incremental);
 
if (!incremental) {
LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) {
s = ilo_shader_cache_upload_shader(shc, shader,
bo, offset, incremental);
if (unlikely(s < 0))
return s;
 
size += s;
offset += s;
}
}
 
LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->changed, list) {
s = ilo_shader_cache_upload_shader(shc, shader,
bo, offset, incremental);
if (unlikely(s < 0))
return s;
 
size += s;
offset += s;
 
list_del(&shader->list);
list_add(&shader->list, &shc->shaders);
}
 
return size;
}
 
/**
* Initialize a shader variant.
*/
void
ilo_shader_variant_init(struct ilo_shader_variant *variant,
const struct ilo_shader_info *info,
const struct ilo_context *ilo)
{
int num_views, i;
 
memset(variant, 0, sizeof(*variant));
 
switch (info->type) {
case PIPE_SHADER_VERTEX:
variant->u.vs.rasterizer_discard =
ilo->rasterizer->state.rasterizer_discard;
variant->u.vs.num_ucps =
util_last_bit(ilo->rasterizer->state.clip_plane_enable);
break;
case PIPE_SHADER_GEOMETRY:
variant->u.gs.rasterizer_discard =
ilo->rasterizer->state.rasterizer_discard;
variant->u.gs.num_inputs = ilo->vs->shader->out.count;
for (i = 0; i < ilo->vs->shader->out.count; i++) {
variant->u.gs.semantic_names[i] =
ilo->vs->shader->out.semantic_names[i];
variant->u.gs.semantic_indices[i] =
ilo->vs->shader->out.semantic_indices[i];
}
break;
case PIPE_SHADER_FRAGMENT:
variant->u.fs.flatshade =
(info->has_color_interp && ilo->rasterizer->state.flatshade);
variant->u.fs.fb_height = (info->has_pos) ?
ilo->fb.state.height : 1;
variant->u.fs.num_cbufs = ilo->fb.state.nr_cbufs;
break;
default:
assert(!"unknown shader type");
break;
}
 
num_views = ilo->view[info->type].count;
assert(info->num_samplers <= num_views);
 
variant->num_sampler_views = info->num_samplers;
for (i = 0; i < info->num_samplers; i++) {
const struct pipe_sampler_view *view = ilo->view[info->type].states[i];
const struct ilo_sampler_cso *sampler = ilo->sampler[info->type].cso[i];
 
if (view) {
variant->sampler_view_swizzles[i].r = view->swizzle_r;
variant->sampler_view_swizzles[i].g = view->swizzle_g;
variant->sampler_view_swizzles[i].b = view->swizzle_b;
variant->sampler_view_swizzles[i].a = view->swizzle_a;
}
else if (info->shadow_samplers & (1 << i)) {
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
}
else {
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
}
 
/*
* When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
* the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
* to manually saturate the texture coordinates.
*/
if (sampler) {
variant->saturate_tex_coords[0] |= sampler->saturate_s << i;
variant->saturate_tex_coords[1] |= sampler->saturate_t << i;
variant->saturate_tex_coords[2] |= sampler->saturate_r << i;
}
}
}
 
/**
* Guess the shader variant, knowing that the context may still change.
*/
static void
ilo_shader_variant_guess(struct ilo_shader_variant *variant,
const struct ilo_shader_info *info,
const struct ilo_context *ilo)
{
int i;
 
memset(variant, 0, sizeof(*variant));
 
switch (info->type) {
case PIPE_SHADER_VERTEX:
break;
case PIPE_SHADER_GEOMETRY:
break;
case PIPE_SHADER_FRAGMENT:
variant->u.fs.flatshade = false;
variant->u.fs.fb_height = (info->has_pos) ?
ilo->fb.state.height : 1;
variant->u.fs.num_cbufs = 1;
break;
default:
assert(!"unknown shader type");
break;
}
 
variant->num_sampler_views = info->num_samplers;
for (i = 0; i < info->num_samplers; i++) {
if (info->shadow_samplers & (1 << i)) {
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
}
else {
variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
}
}
}
 
 
/**
* Parse a TGSI instruction for the shader info.
*/
static void
ilo_shader_info_parse_inst(struct ilo_shader_info *info,
const struct tgsi_full_instruction *inst)
{
int i;
 
/* look for edgeflag passthrough */
if (info->edgeflag_out >= 0 &&
inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
inst->Dst[0].Register.Index == info->edgeflag_out) {
 
assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
info->edgeflag_in = inst->Src[0].Register.Index;
}
 
if (inst->Instruction.Texture) {
bool shadow;
 
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
shadow = true;
break;
default:
shadow = false;
break;
}
 
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *src = &inst->Src[i];
 
if (src->Register.File == TGSI_FILE_SAMPLER) {
const int idx = src->Register.Index;
 
if (idx >= info->num_samplers)
info->num_samplers = idx + 1;
 
if (shadow)
info->shadow_samplers |= 1 << idx;
}
}
}
}
 
/**
* Parse a TGSI property for the shader info.
*/
static void
ilo_shader_info_parse_prop(struct ilo_shader_info *info,
const struct tgsi_full_property *prop)
{
switch (prop->Property.PropertyName) {
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
info->fs_color0_writes_all_cbufs = prop->u[0].Data;
break;
default:
break;
}
}
 
/**
* Parse a TGSI declaration for the shader info.
*/
static void
ilo_shader_info_parse_decl(struct ilo_shader_info *info,
const struct tgsi_full_declaration *decl)
{
switch (decl->Declaration.File) {
case TGSI_FILE_INPUT:
if (decl->Declaration.Interpolate &&
decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR)
info->has_color_interp = true;
if (decl->Declaration.Semantic &&
decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
info->has_pos = true;
break;
case TGSI_FILE_OUTPUT:
if (decl->Declaration.Semantic &&
decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG)
info->edgeflag_out = decl->Range.First;
break;
case TGSI_FILE_SYSTEM_VALUE:
if (decl->Declaration.Semantic &&
decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID)
info->has_instanceid = true;
if (decl->Declaration.Semantic &&
decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
info->has_vertexid = true;
break;
default:
break;
}
}
 
static void
ilo_shader_info_parse_tokens(struct ilo_shader_info *info)
{
struct tgsi_parse_context parse;
 
info->edgeflag_in = -1;
info->edgeflag_out = -1;
 
tgsi_parse_init(&parse, info->tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
const union tgsi_full_token *token;
 
tgsi_parse_token(&parse);
token = &parse.FullToken;
 
switch (token->Token.Type) {
case TGSI_TOKEN_TYPE_DECLARATION:
ilo_shader_info_parse_decl(info, &token->FullDeclaration);
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
ilo_shader_info_parse_inst(info, &token->FullInstruction);
break;
case TGSI_TOKEN_TYPE_PROPERTY:
ilo_shader_info_parse_prop(info, &token->FullProperty);
break;
default:
break;
}
}
tgsi_parse_free(&parse);
}
 
/**
* Create a shader state.
*/
static struct ilo_shader_state *
ilo_shader_state_create(const struct ilo_context *ilo,
int type, const void *templ)
{
struct ilo_shader_state *state;
struct ilo_shader_variant variant;
 
state = CALLOC_STRUCT(ilo_shader_state);
if (!state)
return NULL;
 
state->info.dev = ilo->dev;
state->info.type = type;
 
if (type == PIPE_SHADER_COMPUTE) {
const struct pipe_compute_state *c =
(const struct pipe_compute_state *) templ;
 
state->info.tokens = tgsi_dup_tokens(c->prog);
state->info.compute.req_local_mem = c->req_local_mem;
state->info.compute.req_private_mem = c->req_private_mem;
state->info.compute.req_input_mem = c->req_input_mem;
}
else {
const struct pipe_shader_state *s =
(const struct pipe_shader_state *) templ;
 
state->info.tokens = tgsi_dup_tokens(s->tokens);
state->info.stream_output = s->stream_output;
}
 
list_inithead(&state->variants);
 
ilo_shader_info_parse_tokens(&state->info);
 
/* guess and compile now */
ilo_shader_variant_guess(&variant, &state->info, ilo);
if (!ilo_shader_state_use_variant(state, &variant)) {
ilo_shader_destroy(state);
return NULL;
}
 
return state;
}
 
/**
* Add a compiled shader to the shader state.
*/
static void
ilo_shader_state_add_shader(struct ilo_shader_state *state,
struct ilo_shader *sh)
{
list_add(&sh->list, &state->variants);
state->num_variants++;
state->total_size += sh->kernel_size;
 
if (state->cache)
ilo_shader_cache_notify_change(state->cache, state);
}
 
/**
* Remove a compiled shader from the shader state.
*/
static void
ilo_shader_state_remove_shader(struct ilo_shader_state *state,
struct ilo_shader *sh)
{
list_del(&sh->list);
state->num_variants--;
state->total_size -= sh->kernel_size;
}
 
/**
* Garbage collect shader variants in the shader state.
*/
static void
ilo_shader_state_gc(struct ilo_shader_state *state)
{
/* activate when the variants take up more than 4KiB of space */
const int limit = 4 * 1024;
struct ilo_shader *sh, *next;
 
if (state->total_size < limit)
return;
 
/* remove from the tail as the most recently ones are at the head */
LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) {
ilo_shader_state_remove_shader(state, sh);
ilo_shader_destroy_kernel(sh);
 
if (state->total_size <= limit / 2)
break;
}
}
 
/**
* Search for a shader variant.
*/
static struct ilo_shader *
ilo_shader_state_search_variant(struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
struct ilo_shader *sh = NULL, *tmp;
 
LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) {
if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) {
sh = tmp;
break;
}
}
 
return sh;
}
 
static void
copy_so_info(struct ilo_shader *sh,
const struct pipe_stream_output_info *so_info)
{
unsigned i, attr;
 
if (!so_info->num_outputs)
return;
 
sh->so_info = *so_info;
 
for (i = 0; i < so_info->num_outputs; i++) {
/* figure out which attribute is sourced */
for (attr = 0; attr < sh->out.count; attr++) {
const int reg_idx = sh->out.register_indices[attr];
if (reg_idx == so_info->output[i].register_index)
break;
}
 
if (attr < sh->out.count) {
sh->so_info.output[i].register_index = attr;
}
else {
assert(!"stream output an undefined register");
sh->so_info.output[i].register_index = 0;
}
 
/* PSIZE is at W channel */
if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
assert(so_info->output[i].start_component == 0);
assert(so_info->output[i].num_components == 1);
sh->so_info.output[i].start_component = 3;
}
}
}
 
/**
* Add a shader variant to the shader state.
*/
static struct ilo_shader *
ilo_shader_state_add_variant(struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
struct ilo_shader *sh;
 
switch (state->info.type) {
case PIPE_SHADER_VERTEX:
sh = ilo_shader_compile_vs(state, variant);
break;
case PIPE_SHADER_FRAGMENT:
sh = ilo_shader_compile_fs(state, variant);
break;
case PIPE_SHADER_GEOMETRY:
sh = ilo_shader_compile_gs(state, variant);
break;
case PIPE_SHADER_COMPUTE:
sh = ilo_shader_compile_cs(state, variant);
break;
default:
sh = NULL;
break;
}
if (!sh) {
assert(!"failed to compile shader");
return NULL;
}
 
sh->variant = *variant;
 
copy_so_info(sh, &state->info.stream_output);
 
ilo_shader_state_add_shader(state, sh);
 
return sh;
}
 
/**
* Update state->shader to point to a variant. If the variant does not exist,
* it will be added first.
*/
bool
ilo_shader_state_use_variant(struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
struct ilo_shader *sh;
bool construct_cso = false;
 
sh = ilo_shader_state_search_variant(state, variant);
if (!sh) {
ilo_shader_state_gc(state);
 
sh = ilo_shader_state_add_variant(state, variant);
if (!sh)
return false;
 
construct_cso = true;
}
 
/* move to head */
if (state->variants.next != &sh->list) {
list_del(&sh->list);
list_add(&sh->list, &state->variants);
}
 
state->shader = sh;
 
if (construct_cso) {
switch (state->info.type) {
case PIPE_SHADER_VERTEX:
ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
break;
case PIPE_SHADER_GEOMETRY:
ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
break;
case PIPE_SHADER_FRAGMENT:
ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso);
break;
default:
break;
}
}
 
return true;
}
 
struct ilo_shader_state *
ilo_shader_create_vs(const struct ilo_dev_info *dev,
const struct pipe_shader_state *state,
const struct ilo_context *precompile)
{
struct ilo_shader_state *shader;
 
shader = ilo_shader_state_create(precompile, PIPE_SHADER_VERTEX, state);
 
/* states used in ilo_shader_variant_init() */
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS |
ILO_DIRTY_RASTERIZER;
 
return shader;
}
 
struct ilo_shader_state *
ilo_shader_create_gs(const struct ilo_dev_info *dev,
const struct pipe_shader_state *state,
const struct ilo_context *precompile)
{
struct ilo_shader_state *shader;
 
shader = ilo_shader_state_create(precompile, PIPE_SHADER_GEOMETRY, state);
 
/* states used in ilo_shader_variant_init() */
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS |
ILO_DIRTY_VS |
ILO_DIRTY_RASTERIZER;
 
return shader;
}
 
struct ilo_shader_state *
ilo_shader_create_fs(const struct ilo_dev_info *dev,
const struct pipe_shader_state *state,
const struct ilo_context *precompile)
{
struct ilo_shader_state *shader;
 
shader = ilo_shader_state_create(precompile, PIPE_SHADER_FRAGMENT, state);
 
/* states used in ilo_shader_variant_init() */
shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS |
ILO_DIRTY_RASTERIZER |
ILO_DIRTY_FB;
 
return shader;
}
 
struct ilo_shader_state *
ilo_shader_create_cs(const struct ilo_dev_info *dev,
const struct pipe_compute_state *state,
const struct ilo_context *precompile)
{
struct ilo_shader_state *shader;
 
shader = ilo_shader_state_create(precompile, PIPE_SHADER_COMPUTE, state);
 
shader->info.non_orthogonal_states = 0;
 
return shader;
}
 
/**
* Destroy a shader state.
*/
void
ilo_shader_destroy(struct ilo_shader_state *shader)
{
struct ilo_shader *sh, *next;
 
LIST_FOR_EACH_ENTRY_SAFE(sh, next, &shader->variants, list)
ilo_shader_destroy_kernel(sh);
 
FREE((struct tgsi_token *) shader->info.tokens);
FREE(shader);
}
 
/**
* Return the type (PIPE_SHADER_x) of the shader.
*/
int
ilo_shader_get_type(const struct ilo_shader_state *shader)
{
return shader->info.type;
}
 
/**
* Select a kernel for the given context. This will compile a new kernel if
* none of the existing kernels work with the context.
*
* \param ilo the context
* \param dirty states of the context that are considered changed
* \return true if a different kernel is selected
*/
bool
ilo_shader_select_kernel(struct ilo_shader_state *shader,
const struct ilo_context *ilo,
uint32_t dirty)
{
const struct ilo_shader * const cur = shader->shader;
struct ilo_shader_variant variant;
 
if (!(shader->info.non_orthogonal_states & dirty))
return false;
 
ilo_shader_variant_init(&variant, &shader->info, ilo);
ilo_shader_state_use_variant(shader, &variant);
 
return (shader->shader != cur);
}
 
static int
route_attr(const int *semantics, const int *indices, int len,
int semantic, int index)
{
int i;
 
for (i = 0; i < len; i++) {
if (semantics[i] == semantic && indices[i] == index)
return i;
}
 
/* failed to match for COLOR, try BCOLOR */
if (semantic == TGSI_SEMANTIC_COLOR) {
for (i = 0; i < len; i++) {
if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index)
return i;
}
}
 
return -1;
}
 
/**
* Select a routing for the given source shader and rasterizer state.
*
* \return true if a different routing is selected
*/
bool
ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
const struct ilo_shader_state *source,
const struct ilo_rasterizer_state *rasterizer)
{
const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
const bool light_twoside = rasterizer->state.light_twoside;
struct ilo_shader *kernel = shader->shader;
struct ilo_kernel_routing *routing = &kernel->routing;
const int *src_semantics, *src_indices;
int src_len, max_src_slot;
int dst_len, dst_slot;
 
/* we are constructing 3DSTATE_SBE here */
assert(shader->info.dev->gen >= ILO_GEN(6) &&
shader->info.dev->gen <= ILO_GEN(7));
 
assert(kernel);
 
if (source) {
assert(source->shader);
src_semantics = source->shader->out.semantic_names;
src_indices = source->shader->out.semantic_indices;
src_len = source->shader->out.count;
}
else {
src_semantics = kernel->in.semantic_names;
src_indices = kernel->in.semantic_indices;
src_len = kernel->in.count;
}
 
/* no change */
if (kernel->routing_initialized &&
routing->source_skip + routing->source_len <= src_len &&
kernel->routing_sprite_coord_enable == sprite_coord_enable &&
!memcmp(kernel->routing_src_semantics,
&src_semantics[routing->source_skip],
sizeof(kernel->routing_src_semantics[0]) * routing->source_len) &&
!memcmp(kernel->routing_src_indices,
&src_indices[routing->source_skip],
sizeof(kernel->routing_src_indices[0]) * routing->source_len))
return false;
 
if (source) {
/* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
routing->source_skip = 2;
 
routing->source_len = src_len - routing->source_skip;
src_semantics += routing->source_skip;
src_indices += routing->source_skip;
}
else {
routing->source_skip = 0;
routing->source_len = src_len;
}
 
routing->const_interp_enable = kernel->in.const_interp_enable;
routing->point_sprite_enable = 0;
routing->swizzle_enable = false;
 
assert(kernel->in.count <= Elements(routing->swizzles));
dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
max_src_slot = -1;
 
for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
const int semantic = kernel->in.semantic_names[dst_slot];
const int index = kernel->in.semantic_indices[dst_slot];
int src_slot;
 
if (semantic == TGSI_SEMANTIC_GENERIC &&
(sprite_coord_enable & (1 << index)))
routing->point_sprite_enable |= 1 << dst_slot;
 
if (source) {
src_slot = route_attr(src_semantics, src_indices,
routing->source_len, semantic, index);
 
/*
* The source shader stage does not output this attribute. The value
* is supposed to be undefined, unless the attribute goes through
* point sprite replacement or the attribute is
* TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
* attribute is picked.
*
* We should update the kernel code and omit the output of
* TGSI_SEMANTIC_POSITION here.
*/
if (src_slot < 0)
src_slot = 0;
}
else {
src_slot = dst_slot;
}
 
routing->swizzles[dst_slot] = src_slot;
 
/* use the following slot for two-sided lighting */
if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
src_slot + 1 < routing->source_len &&
src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
src_indices[src_slot + 1] == index) {
routing->swizzles[dst_slot] |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
ATTRIBUTE_SWIZZLE_SHIFT;
src_slot++;
}
 
if (routing->swizzles[dst_slot] != dst_slot)
routing->swizzle_enable = true;
 
if (max_src_slot < src_slot)
max_src_slot = src_slot;
}
 
memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
sizeof(routing->swizzles[0]) * dst_slot);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 248:
*
* "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
* 0 indicating no Vertex URB data to be read.
*
* This field should be set to the minimum length required to read the
* maximum source attribute. The maximum source attribute is indicated
* by the maximum value of the enabled Attribute # Source Attribute if
* Attribute Swizzle Enable is set, Number of Output Attributes-1 if
* enable is not set.
*
* read_length = ceiling((max_source_attr+1)/2)
*
* [errata] Corruption/Hang possible if length programmed larger than
* recommended"
*/
routing->source_len = max_src_slot + 1;
 
/* remember the states of the source */
kernel->routing_initialized = true;
kernel->routing_sprite_coord_enable = sprite_coord_enable;
memcpy(kernel->routing_src_semantics, src_semantics,
sizeof(kernel->routing_src_semantics[0]) * routing->source_len);
memcpy(kernel->routing_src_indices, src_indices,
sizeof(kernel->routing_src_indices[0]) * routing->source_len);
 
return true;
}
 
/**
* Return the cache offset of the selected kernel. This must be called after
* ilo_shader_select_kernel() and ilo_shader_cache_upload().
*/
uint32_t
ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
 
assert(kernel && kernel->uploaded);
 
return kernel->cache_offset;
}
 
/**
* Query a kernel parameter for the selected kernel.
*/
int
ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
enum ilo_kernel_param param)
{
const struct ilo_shader *kernel = shader->shader;
int val;
 
assert(kernel);
 
switch (param) {
case ILO_KERNEL_INPUT_COUNT:
val = kernel->in.count;
break;
case ILO_KERNEL_OUTPUT_COUNT:
val = kernel->out.count;
break;
case ILO_KERNEL_URB_DATA_START_REG:
val = kernel->in.start_grf;
break;
 
case ILO_KERNEL_VS_INPUT_INSTANCEID:
val = shader->info.has_instanceid;
break;
case ILO_KERNEL_VS_INPUT_VERTEXID:
val = shader->info.has_vertexid;
break;
case ILO_KERNEL_VS_INPUT_EDGEFLAG:
if (shader->info.edgeflag_in >= 0) {
/* we rely on the state tracker here */
assert(shader->info.edgeflag_in == kernel->in.count - 1);
val = true;
}
else {
val = false;
}
break;
case ILO_KERNEL_VS_PCB_UCP_SIZE:
val = kernel->pcb.clip_state_size;
break;
case ILO_KERNEL_VS_GEN6_SO:
val = kernel->stream_output;
break;
case ILO_KERNEL_VS_GEN6_SO_START_REG:
val = kernel->gs_start_grf;
break;
case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET:
val = kernel->gs_offsets[0];
break;
case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET:
val = kernel->gs_offsets[1];
break;
case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET:
val = kernel->gs_offsets[2];
break;
 
case ILO_KERNEL_GS_DISCARD_ADJACENCY:
val = kernel->in.discard_adj;
break;
case ILO_KERNEL_GS_GEN6_SVBI_POST_INC:
val = kernel->svbi_post_inc;
break;
 
case ILO_KERNEL_FS_INPUT_Z:
case ILO_KERNEL_FS_INPUT_W:
val = kernel->in.has_pos;
break;
case ILO_KERNEL_FS_OUTPUT_Z:
val = kernel->out.has_pos;
break;
case ILO_KERNEL_FS_USE_KILL:
val = kernel->has_kill;
break;
case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS:
val = kernel->in.barycentric_interpolation_mode;
break;
case ILO_KERNEL_FS_DISPATCH_16_OFFSET:
val = 0;
break;
 
default:
assert(!"unknown kernel parameter");
val = 0;
break;
}
 
return val;
}
 
/**
* Return the CSO of the selected kernel.
*/
const struct ilo_shader_cso *
ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
 
assert(kernel);
 
return &kernel->cso;
}
 
/**
* Return the SO info of the selected kernel.
*/
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
 
assert(kernel);
 
return &kernel->so_info;
}
 
/**
* Return the routing info of the selected kernel.
*/
const struct ilo_kernel_routing *
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
{
const struct ilo_shader *kernel = shader->shader;
 
assert(kernel);
 
return &kernel->routing;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_shader.h
0,0 → 1,148
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_SHADER_H
#define ILO_SHADER_H
 
#include "ilo_common.h"
 
enum ilo_kernel_param {
ILO_KERNEL_INPUT_COUNT,
ILO_KERNEL_OUTPUT_COUNT,
ILO_KERNEL_URB_DATA_START_REG,
 
ILO_KERNEL_VS_INPUT_INSTANCEID,
ILO_KERNEL_VS_INPUT_VERTEXID,
ILO_KERNEL_VS_INPUT_EDGEFLAG,
ILO_KERNEL_VS_PCB_UCP_SIZE,
ILO_KERNEL_VS_GEN6_SO,
ILO_KERNEL_VS_GEN6_SO_START_REG,
ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET,
ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET,
ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET,
 
ILO_KERNEL_GS_DISCARD_ADJACENCY,
ILO_KERNEL_GS_GEN6_SVBI_POST_INC,
 
ILO_KERNEL_FS_INPUT_Z,
ILO_KERNEL_FS_INPUT_W,
ILO_KERNEL_FS_OUTPUT_Z,
ILO_KERNEL_FS_USE_KILL,
ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS,
ILO_KERNEL_FS_DISPATCH_16_OFFSET,
 
ILO_KERNEL_PARAM_COUNT,
};
 
struct ilo_kernel_routing {
uint32_t const_interp_enable;
uint32_t point_sprite_enable;
unsigned source_skip, source_len;
 
bool swizzle_enable;
uint16_t swizzles[16];
};
 
struct intel_bo;
struct ilo_context;
struct ilo_rasterizer_state;
struct ilo_shader_cache;
struct ilo_shader_state;
struct ilo_shader_cso;
 
struct ilo_shader_cache *
ilo_shader_cache_create(void);
 
void
ilo_shader_cache_destroy(struct ilo_shader_cache *shc);
 
void
ilo_shader_cache_add(struct ilo_shader_cache *shc,
struct ilo_shader_state *shader);
 
void
ilo_shader_cache_remove(struct ilo_shader_cache *shc,
struct ilo_shader_state *shader);
 
int
ilo_shader_cache_upload(struct ilo_shader_cache *shc,
struct intel_bo *bo, unsigned offset,
bool incremental);
 
struct ilo_shader_state *
ilo_shader_create_vs(const struct ilo_dev_info *dev,
const struct pipe_shader_state *state,
const struct ilo_context *precompile);
 
struct ilo_shader_state *
ilo_shader_create_gs(const struct ilo_dev_info *dev,
const struct pipe_shader_state *state,
const struct ilo_context *precompile);
 
struct ilo_shader_state *
ilo_shader_create_fs(const struct ilo_dev_info *dev,
const struct pipe_shader_state *state,
const struct ilo_context *precompile);
 
struct ilo_shader_state *
ilo_shader_create_cs(const struct ilo_dev_info *dev,
const struct pipe_compute_state *state,
const struct ilo_context *precompile);
 
void
ilo_shader_destroy(struct ilo_shader_state *shader);
 
int
ilo_shader_get_type(const struct ilo_shader_state *shader);
 
bool
ilo_shader_select_kernel(struct ilo_shader_state *shader,
const struct ilo_context *ilo,
uint32_t dirty);
 
bool
ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
const struct ilo_shader_state *source,
const struct ilo_rasterizer_state *rasterizer);
 
uint32_t
ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader);
 
int
ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
enum ilo_kernel_param param);
 
const struct ilo_shader_cso *
ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader);
 
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
 
const struct ilo_kernel_routing *
ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);
 
#endif /* ILO_SHADER_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_state.c
0,0 → 1,1449
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_upload_mgr.h"
 
#include "ilo_context.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
 
static void
finalize_shader_states(struct ilo_context *ilo)
{
unsigned type;
 
for (type = 0; type < PIPE_SHADER_TYPES; type++) {
struct ilo_shader_state *shader;
uint32_t state;
 
switch (type) {
case PIPE_SHADER_VERTEX:
shader = ilo->vs;
state = ILO_DIRTY_VS;
break;
case PIPE_SHADER_GEOMETRY:
shader = ilo->gs;
state = ILO_DIRTY_GS;
break;
case PIPE_SHADER_FRAGMENT:
shader = ilo->fs;
state = ILO_DIRTY_FS;
break;
default:
shader = NULL;
state = 0;
break;
}
 
if (!shader)
continue;
 
/* compile if the shader or the states it depends on changed */
if (ilo->dirty & state) {
ilo_shader_select_kernel(shader, ilo, ILO_DIRTY_ALL);
}
else if (ilo_shader_select_kernel(shader, ilo, ilo->dirty)) {
/* mark the state dirty if a new kernel is selected */
ilo->dirty |= state;
}
 
/* need to setup SBE for FS */
if (type == PIPE_SHADER_FRAGMENT && ilo->dirty &
(state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) {
if (ilo_shader_select_kernel_routing(shader,
(ilo->gs) ? ilo->gs : ilo->vs, ilo->rasterizer))
ilo->dirty |= state;
}
}
}
 
static void
finalize_constant_buffers(struct ilo_context *ilo)
{
int sh;
 
if (!(ilo->dirty & ILO_DIRTY_CBUF))
return;
 
/* TODO push constants? */
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
unsigned enabled_mask = ilo->cbuf[sh].enabled_mask;
 
while (enabled_mask) {
struct ilo_cbuf_cso *cbuf;
int i;
 
i = u_bit_scan(&enabled_mask);
cbuf = &ilo->cbuf[sh].cso[i];
 
/* upload user buffer */
if (cbuf->user_buffer) {
const enum pipe_format elem_format =
PIPE_FORMAT_R32G32B32A32_FLOAT;
unsigned offset;
 
u_upload_data(ilo->uploader, 0, cbuf->user_buffer_size,
cbuf->user_buffer, &offset, &cbuf->resource);
 
ilo_gpe_init_view_surface_for_buffer(ilo->dev,
ilo_buffer(cbuf->resource),
offset, cbuf->user_buffer_size,
util_format_get_blocksize(elem_format), elem_format,
false, false, &cbuf->surface);
 
cbuf->user_buffer = NULL;
cbuf->user_buffer_size = 0;
}
}
}
}
 
static void
finalize_index_buffer(struct ilo_context *ilo)
{
const struct pipe_resource *current_hw_res = ilo->ib.hw_resource;
const bool need_upload = (ilo->draw->indexed &&
(ilo->ib.user_buffer || ilo->ib.offset % ilo->ib.index_size));
 
if (!(ilo->dirty & ILO_DIRTY_IB) && !need_upload)
return;
 
if (need_upload) {
const unsigned offset = ilo->ib.index_size * ilo->draw->start;
const unsigned size = ilo->ib.index_size * ilo->draw->count;
unsigned hw_offset;
 
if (ilo->ib.user_buffer) {
u_upload_data(ilo->uploader, 0, size,
ilo->ib.user_buffer + offset, &hw_offset, &ilo->ib.hw_resource);
}
else {
u_upload_buffer(ilo->uploader, 0, ilo->ib.offset + offset, size,
ilo->ib.buffer, &hw_offset, &ilo->ib.hw_resource);
}
 
/* the HW offset should be aligned */
assert(hw_offset % ilo->ib.index_size == 0);
ilo->ib.draw_start_offset = hw_offset / ilo->ib.index_size;
 
/*
* INDEX[ilo->draw->start] in the original buffer is INDEX[0] in the HW
* resource
*/
ilo->ib.draw_start_offset -= ilo->draw->start;
}
else {
pipe_resource_reference(&ilo->ib.hw_resource, ilo->ib.buffer);
 
/* note that index size may be zero when the draw is not indexed */
if (ilo->draw->indexed)
ilo->ib.draw_start_offset = ilo->ib.offset / ilo->ib.index_size;
else
ilo->ib.draw_start_offset = 0;
}
 
/* treat the IB as clean if the HW states do not change */
if (ilo->ib.hw_resource == current_hw_res &&
ilo->ib.hw_index_size == ilo->ib.index_size)
ilo->dirty &= ~ILO_DIRTY_IB;
else
ilo->ib.hw_index_size = ilo->ib.index_size;
}
 
/**
* Finalize states. Some states depend on other states and are
* incomplete/invalid until finalized.
*/
void
ilo_finalize_3d_states(struct ilo_context *ilo,
const struct pipe_draw_info *draw)
{
ilo->draw = draw;
 
finalize_shader_states(ilo);
finalize_constant_buffers(ilo);
finalize_index_buffer(ilo);
 
u_upload_unmap(ilo->uploader);
}
 
static void *
ilo_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_blend_state *blend;
 
blend = MALLOC_STRUCT(ilo_blend_state);
assert(blend);
 
ilo_gpe_init_blend(ilo->dev, state, blend);
 
return blend;
}
 
static void
ilo_bind_blend_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->blend = state;
 
ilo->dirty |= ILO_DIRTY_BLEND;
}
 
static void
ilo_delete_blend_state(struct pipe_context *pipe, void *state)
{
FREE(state);
}
 
static void *
ilo_create_sampler_state(struct pipe_context *pipe,
const struct pipe_sampler_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_sampler_cso *sampler;
 
sampler = MALLOC_STRUCT(ilo_sampler_cso);
assert(sampler);
 
ilo_gpe_init_sampler_cso(ilo->dev, state, sampler);
 
return sampler;
}
 
static void
ilo_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
unsigned start, unsigned count, void **samplers)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_sampler_state *dst = &ilo->sampler[shader];
unsigned i;
 
assert(start + count <= Elements(dst->cso));
 
if (likely(shader != PIPE_SHADER_COMPUTE)) {
if (!samplers) {
start = 0;
count = 0;
}
 
/* samplers not in range are also unbound */
for (i = 0; i < start; i++)
dst->cso[i] = NULL;
for (; i < start + count; i++)
dst->cso[i] = samplers[i - start];
for (; i < dst->count; i++)
dst->cso[i] = NULL;
 
dst->count = start + count;
 
return;
}
 
if (samplers) {
for (i = 0; i < count; i++)
dst->cso[start + i] = samplers[i];
}
else {
for (i = 0; i < count; i++)
dst->cso[start + i] = NULL;
}
 
if (dst->count <= start + count) {
if (samplers)
count += start;
else
count = start;
 
while (count > 0 && !dst->cso[count - 1])
count--;
 
dst->count = count;
}
}
 
static void
ilo_bind_fragment_sampler_states(struct pipe_context *pipe,
unsigned num_samplers,
void **samplers)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_bind_sampler_states(pipe, PIPE_SHADER_FRAGMENT,
0, num_samplers, samplers);
 
ilo->dirty |= ILO_DIRTY_SAMPLER_FS;
}
 
static void
ilo_bind_vertex_sampler_states(struct pipe_context *pipe,
unsigned num_samplers,
void **samplers)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_bind_sampler_states(pipe, PIPE_SHADER_VERTEX,
0, num_samplers, samplers);
 
ilo->dirty |= ILO_DIRTY_SAMPLER_VS;
}
 
static void
ilo_bind_geometry_sampler_states(struct pipe_context *pipe,
unsigned num_samplers,
void **samplers)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_bind_sampler_states(pipe, PIPE_SHADER_GEOMETRY,
0, num_samplers, samplers);
 
ilo->dirty |= ILO_DIRTY_SAMPLER_GS;
}
 
static void
ilo_bind_compute_sampler_states(struct pipe_context *pipe,
unsigned start_slot,
unsigned num_samplers,
void **samplers)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,
start_slot, num_samplers, samplers);
 
ilo->dirty |= ILO_DIRTY_SAMPLER_CS;
}
 
static void
ilo_delete_sampler_state(struct pipe_context *pipe, void *state)
{
FREE(state);
}
 
static void *
ilo_create_rasterizer_state(struct pipe_context *pipe,
const struct pipe_rasterizer_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_rasterizer_state *rast;
 
rast = MALLOC_STRUCT(ilo_rasterizer_state);
assert(rast);
 
rast->state = *state;
ilo_gpe_init_rasterizer(ilo->dev, state, rast);
 
return rast;
}
 
static void
ilo_bind_rasterizer_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->rasterizer = state;
 
ilo->dirty |= ILO_DIRTY_RASTERIZER;
}
 
static void
ilo_delete_rasterizer_state(struct pipe_context *pipe, void *state)
{
FREE(state);
}
 
static void *
ilo_create_depth_stencil_alpha_state(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_dsa_state *dsa;
 
dsa = MALLOC_STRUCT(ilo_dsa_state);
assert(dsa);
 
ilo_gpe_init_dsa(ilo->dev, state, dsa);
 
return dsa;
}
 
static void
ilo_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->dsa = state;
 
ilo->dirty |= ILO_DIRTY_DSA;
}
 
static void
ilo_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *state)
{
FREE(state);
}
 
static void *
ilo_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *shader;
 
shader = ilo_shader_create_fs(ilo->dev, state, ilo);
assert(shader);
 
ilo_shader_cache_add(ilo->shader_cache, shader);
 
return shader;
}
 
static void
ilo_bind_fs_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->fs = state;
 
ilo->dirty |= ILO_DIRTY_FS;
}
 
static void
ilo_delete_fs_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *fs = (struct ilo_shader_state *) state;
 
ilo_shader_cache_remove(ilo->shader_cache, fs);
ilo_shader_destroy(fs);
}
 
static void *
ilo_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *shader;
 
shader = ilo_shader_create_vs(ilo->dev, state, ilo);
assert(shader);
 
ilo_shader_cache_add(ilo->shader_cache, shader);
 
return shader;
}
 
static void
ilo_bind_vs_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->vs = state;
 
ilo->dirty |= ILO_DIRTY_VS;
}
 
static void
ilo_delete_vs_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *vs = (struct ilo_shader_state *) state;
 
ilo_shader_cache_remove(ilo->shader_cache, vs);
ilo_shader_destroy(vs);
}
 
static void *
ilo_create_gs_state(struct pipe_context *pipe,
const struct pipe_shader_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *shader;
 
shader = ilo_shader_create_gs(ilo->dev, state, ilo);
assert(shader);
 
ilo_shader_cache_add(ilo->shader_cache, shader);
 
return shader;
}
 
static void
ilo_bind_gs_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
/* util_blitter may set this unnecessarily */
if (ilo->gs == state)
return;
 
ilo->gs = state;
 
ilo->dirty |= ILO_DIRTY_GS;
}
 
static void
ilo_delete_gs_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *gs = (struct ilo_shader_state *) state;
 
ilo_shader_cache_remove(ilo->shader_cache, gs);
ilo_shader_destroy(gs);
}
 
static void *
ilo_create_vertex_elements_state(struct pipe_context *pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_ve_state *ve;
 
ve = MALLOC_STRUCT(ilo_ve_state);
assert(ve);
 
ilo_gpe_init_ve(ilo->dev, num_elements, elements, ve);
 
return ve;
}
 
static void
ilo_bind_vertex_elements_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->ve = state;
 
ilo->dirty |= ILO_DIRTY_VE;
}
 
static void
ilo_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
{
struct ilo_ve_state *ve = state;
 
FREE(ve);
}
 
static void
ilo_set_blend_color(struct pipe_context *pipe,
const struct pipe_blend_color *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->blend_color = *state;
 
ilo->dirty |= ILO_DIRTY_BLEND_COLOR;
}
 
static void
ilo_set_stencil_ref(struct pipe_context *pipe,
const struct pipe_stencil_ref *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
/* util_blitter may set this unnecessarily */
if (!memcpy(&ilo->stencil_ref, state, sizeof(*state)))
return;
 
ilo->stencil_ref = *state;
 
ilo->dirty |= ILO_DIRTY_STENCIL_REF;
}
 
static void
ilo_set_sample_mask(struct pipe_context *pipe,
unsigned sample_mask)
{
struct ilo_context *ilo = ilo_context(pipe);
 
/* util_blitter may set this unnecessarily */
if (ilo->sample_mask == sample_mask)
return;
 
ilo->sample_mask = sample_mask;
 
ilo->dirty |= ILO_DIRTY_SAMPLE_MASK;
}
 
static void
ilo_set_clip_state(struct pipe_context *pipe,
const struct pipe_clip_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->clip = *state;
 
ilo->dirty |= ILO_DIRTY_CLIP;
}
 
static void
ilo_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
struct pipe_constant_buffer *buf)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_cbuf_state *cbuf = &ilo->cbuf[shader];
const unsigned count = 1;
unsigned i;
 
assert(shader < Elements(ilo->cbuf));
assert(index + count <= Elements(ilo->cbuf[shader].cso));
 
if (buf) {
for (i = 0; i < count; i++) {
struct ilo_cbuf_cso *cso = &cbuf->cso[index + i];
 
pipe_resource_reference(&cso->resource, buf[i].buffer);
 
if (buf[i].buffer) {
const enum pipe_format elem_format =
PIPE_FORMAT_R32G32B32A32_FLOAT;
 
ilo_gpe_init_view_surface_for_buffer(ilo->dev,
ilo_buffer(buf[i].buffer),
buf[i].buffer_offset, buf[i].buffer_size,
util_format_get_blocksize(elem_format), elem_format,
false, false, &cso->surface);
 
cso->user_buffer = NULL;
cso->user_buffer_size = 0;
 
cbuf->enabled_mask |= 1 << (index + i);
}
else if (buf[i].user_buffer) {
cso->surface.bo = NULL;
 
/* buffer_offset does not apply for user buffer */
cso->user_buffer = buf[i].user_buffer;
cso->user_buffer_size = buf[i].buffer_size;
 
cbuf->enabled_mask |= 1 << (index + i);
}
else {
cso->surface.bo = NULL;
cso->user_buffer = NULL;
cso->user_buffer_size = 0;
 
cbuf->enabled_mask &= ~(1 << (index + i));
}
}
}
else {
for (i = 0; i < count; i++) {
struct ilo_cbuf_cso *cso = &cbuf->cso[index + i];
 
pipe_resource_reference(&cso->resource, NULL);
cso->surface.bo = NULL;
cso->user_buffer = NULL;
cso->user_buffer_size = 0;
 
cbuf->enabled_mask &= ~(1 << (index + i));
}
}
 
ilo->dirty |= ILO_DIRTY_CBUF;
}
 
static void
ilo_set_framebuffer_state(struct pipe_context *pipe,
const struct pipe_framebuffer_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
util_copy_framebuffer_state(&ilo->fb.state, state);
 
if (state->nr_cbufs)
ilo->fb.num_samples = state->cbufs[0]->texture->nr_samples;
else if (state->zsbuf)
ilo->fb.num_samples = state->zsbuf->texture->nr_samples;
else
ilo->fb.num_samples = 1;
 
if (!ilo->fb.num_samples)
ilo->fb.num_samples = 1;
 
ilo->dirty |= ILO_DIRTY_FB;
}
 
static void
ilo_set_polygon_stipple(struct pipe_context *pipe,
const struct pipe_poly_stipple *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->poly_stipple = *state;
 
ilo->dirty |= ILO_DIRTY_POLY_STIPPLE;
}
 
static void
ilo_set_scissor_states(struct pipe_context *pipe,
unsigned start_slot,
unsigned num_scissors,
const struct pipe_scissor_state *scissors)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_gpe_set_scissor(ilo->dev, start_slot, num_scissors,
scissors, &ilo->scissor);
 
ilo->dirty |= ILO_DIRTY_SCISSOR;
}
 
static void
ilo_set_viewport_states(struct pipe_context *pipe,
unsigned start_slot,
unsigned num_viewports,
const struct pipe_viewport_state *viewports)
{
struct ilo_context *ilo = ilo_context(pipe);
 
if (viewports) {
unsigned i;
 
for (i = 0; i < num_viewports; i++) {
ilo_gpe_set_viewport_cso(ilo->dev, &viewports[i],
&ilo->viewport.cso[start_slot + i]);
}
 
if (ilo->viewport.count < start_slot + num_viewports)
ilo->viewport.count = start_slot + num_viewports;
 
/* need to save viewport 0 for util_blitter */
if (!start_slot && num_viewports)
ilo->viewport.viewport0 = viewports[0];
}
else {
if (ilo->viewport.count <= start_slot + num_viewports &&
ilo->viewport.count > start_slot)
ilo->viewport.count = start_slot;
}
 
ilo->dirty |= ILO_DIRTY_VIEWPORT;
}
 
static void
ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
unsigned start, unsigned count,
struct pipe_sampler_view **views)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_view_state *dst = &ilo->view[shader];
unsigned i;
 
assert(start + count <= Elements(dst->states));
 
if (likely(shader != PIPE_SHADER_COMPUTE)) {
if (!views) {
start = 0;
count = 0;
}
 
/* views not in range are also unbound */
for (i = 0; i < start; i++)
pipe_sampler_view_reference(&dst->states[i], NULL);
for (; i < start + count; i++)
pipe_sampler_view_reference(&dst->states[i], views[i - start]);
for (; i < dst->count; i++)
pipe_sampler_view_reference(&dst->states[i], NULL);
 
dst->count = start + count;
 
return;
}
 
if (views) {
for (i = 0; i < count; i++)
pipe_sampler_view_reference(&dst->states[start + i], views[i]);
}
else {
for (i = 0; i < count; i++)
pipe_sampler_view_reference(&dst->states[start + i], NULL);
}
 
if (dst->count <= start + count) {
if (views)
count += start;
else
count = start;
 
while (count > 0 && !dst->states[count - 1])
count--;
 
dst->count = count;
}
}
 
static void
ilo_set_fragment_sampler_views(struct pipe_context *pipe,
unsigned num_views,
struct pipe_sampler_view **views)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT,
0, num_views, views);
 
ilo->dirty |= ILO_DIRTY_VIEW_FS;
}
 
static void
ilo_set_vertex_sampler_views(struct pipe_context *pipe,
unsigned num_views,
struct pipe_sampler_view **views)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_set_sampler_views(pipe, PIPE_SHADER_VERTEX,
0, num_views, views);
 
ilo->dirty |= ILO_DIRTY_VIEW_VS;
}
 
static void
ilo_set_geometry_sampler_views(struct pipe_context *pipe,
unsigned num_views,
struct pipe_sampler_view **views)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_set_sampler_views(pipe, PIPE_SHADER_GEOMETRY,
0, num_views, views);
 
ilo->dirty |= ILO_DIRTY_VIEW_GS;
}
 
static void
ilo_set_compute_sampler_views(struct pipe_context *pipe,
unsigned start_slot, unsigned num_views,
struct pipe_sampler_view **views)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo_set_sampler_views(pipe, PIPE_SHADER_COMPUTE,
start_slot, num_views, views);
 
ilo->dirty |= ILO_DIRTY_VIEW_CS;
}
 
static void
ilo_set_shader_resources(struct pipe_context *pipe,
unsigned start, unsigned count,
struct pipe_surface **surfaces)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_resource_state *dst = &ilo->resource;
unsigned i;
 
assert(start + count <= Elements(dst->states));
 
if (surfaces) {
for (i = 0; i < count; i++)
pipe_surface_reference(&dst->states[start + i], surfaces[i]);
}
else {
for (i = 0; i < count; i++)
pipe_surface_reference(&dst->states[start + i], NULL);
}
 
if (dst->count <= start + count) {
if (surfaces)
count += start;
else
count = start;
 
while (count > 0 && !dst->states[count - 1])
count--;
 
dst->count = count;
}
 
ilo->dirty |= ILO_DIRTY_RESOURCE;
}
 
static void
ilo_set_vertex_buffers(struct pipe_context *pipe,
unsigned start_slot, unsigned num_buffers,
const struct pipe_vertex_buffer *buffers)
{
struct ilo_context *ilo = ilo_context(pipe);
unsigned i;
 
/* no PIPE_CAP_USER_VERTEX_BUFFERS */
if (buffers) {
for (i = 0; i < num_buffers; i++)
assert(!buffers[i].user_buffer);
}
 
util_set_vertex_buffers_mask(ilo->vb.states,
&ilo->vb.enabled_mask, buffers, start_slot, num_buffers);
 
ilo->dirty |= ILO_DIRTY_VB;
}
 
static void
ilo_set_index_buffer(struct pipe_context *pipe,
const struct pipe_index_buffer *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
if (state) {
pipe_resource_reference(&ilo->ib.buffer, state->buffer);
ilo->ib.user_buffer = state->user_buffer;
ilo->ib.offset = state->offset;
ilo->ib.index_size = state->index_size;
}
else {
pipe_resource_reference(&ilo->ib.buffer, NULL);
ilo->ib.user_buffer = NULL;
ilo->ib.offset = 0;
ilo->ib.index_size = 0;
}
 
ilo->dirty |= ILO_DIRTY_IB;
}
 
static struct pipe_stream_output_target *
ilo_create_stream_output_target(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned buffer_offset,
unsigned buffer_size)
{
struct pipe_stream_output_target *target;
 
target = MALLOC_STRUCT(pipe_stream_output_target);
assert(target);
 
pipe_reference_init(&target->reference, 1);
target->buffer = NULL;
pipe_resource_reference(&target->buffer, res);
target->context = pipe;
target->buffer_offset = buffer_offset;
target->buffer_size = buffer_size;
 
return target;
}
 
static void
ilo_set_stream_output_targets(struct pipe_context *pipe,
unsigned num_targets,
struct pipe_stream_output_target **targets,
unsigned append_bitmask)
{
struct ilo_context *ilo = ilo_context(pipe);
unsigned i;
 
if (!targets)
num_targets = 0;
 
/* util_blitter may set this unnecessarily */
if (!ilo->so.count && !num_targets)
return;
 
for (i = 0; i < num_targets; i++)
pipe_so_target_reference(&ilo->so.states[i], targets[i]);
 
for (; i < ilo->so.count; i++)
pipe_so_target_reference(&ilo->so.states[i], NULL);
 
ilo->so.count = num_targets;
ilo->so.append_bitmask = append_bitmask;
 
ilo->so.enabled = (ilo->so.count > 0);
 
ilo->dirty |= ILO_DIRTY_SO;
}
 
static void
ilo_stream_output_target_destroy(struct pipe_context *pipe,
struct pipe_stream_output_target *target)
{
pipe_resource_reference(&target->buffer, NULL);
FREE(target);
}
 
static struct pipe_sampler_view *
ilo_create_sampler_view(struct pipe_context *pipe,
struct pipe_resource *res,
const struct pipe_sampler_view *templ)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_view_cso *view;
 
view = MALLOC_STRUCT(ilo_view_cso);
assert(view);
 
view->base = *templ;
pipe_reference_init(&view->base.reference, 1);
view->base.texture = NULL;
pipe_resource_reference(&view->base.texture, res);
view->base.context = pipe;
 
if (res->target == PIPE_BUFFER) {
const unsigned elem_size = util_format_get_blocksize(templ->format);
const unsigned first_elem = templ->u.buf.first_element;
const unsigned num_elems = templ->u.buf.last_element - first_elem + 1;
 
ilo_gpe_init_view_surface_for_buffer(ilo->dev, ilo_buffer(res),
first_elem * elem_size, num_elems * elem_size,
elem_size, templ->format, false, false, &view->surface);
}
else {
struct ilo_texture *tex = ilo_texture(res);
 
/* warn about degraded performance because of a missing binding flag */
if (tex->tiling == INTEL_TILING_NONE &&
!(tex->base.bind & PIPE_BIND_SAMPLER_VIEW)) {
ilo_warn("creating sampler view for a resource "
"not created for sampling\n");
}
 
ilo_gpe_init_view_surface_for_texture(ilo->dev, tex,
templ->format,
templ->u.tex.first_level,
templ->u.tex.last_level - templ->u.tex.first_level + 1,
templ->u.tex.first_layer,
templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
false, false, &view->surface);
}
 
return &view->base;
}
 
static void
ilo_sampler_view_destroy(struct pipe_context *pipe,
struct pipe_sampler_view *view)
{
pipe_resource_reference(&view->texture, NULL);
FREE(view);
}
 
static struct pipe_surface *
ilo_create_surface(struct pipe_context *pipe,
struct pipe_resource *res,
const struct pipe_surface *templ)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_surface_cso *surf;
 
surf = MALLOC_STRUCT(ilo_surface_cso);
assert(surf);
 
surf->base = *templ;
pipe_reference_init(&surf->base.reference, 1);
surf->base.texture = NULL;
pipe_resource_reference(&surf->base.texture, res);
 
surf->base.context = pipe;
surf->base.width = u_minify(res->width0, templ->u.tex.level);
surf->base.height = u_minify(res->height0, templ->u.tex.level);
 
surf->is_rt = !util_format_is_depth_or_stencil(templ->format);
 
if (surf->is_rt) {
/* relax this? */
assert(res->target != PIPE_BUFFER);
 
/*
* classic i965 sets render_cache_rw for constant buffers and sol
* surfaces but not render buffers. Why?
*/
ilo_gpe_init_view_surface_for_texture(ilo->dev, ilo_texture(res),
templ->format, templ->u.tex.level, 1,
templ->u.tex.first_layer,
templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
true, true, &surf->u.rt);
}
else {
assert(res->target != PIPE_BUFFER);
 
ilo_gpe_init_zs_surface(ilo->dev, ilo_texture(res),
templ->format, templ->u.tex.level,
templ->u.tex.first_layer,
templ->u.tex.last_layer - templ->u.tex.first_layer + 1,
&surf->u.zs);
}
 
return &surf->base;
}
 
static void
ilo_surface_destroy(struct pipe_context *pipe,
struct pipe_surface *surface)
{
pipe_resource_reference(&surface->texture, NULL);
FREE(surface);
}
 
static void *
ilo_create_compute_state(struct pipe_context *pipe,
const struct pipe_compute_state *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *shader;
 
shader = ilo_shader_create_cs(ilo->dev, state, ilo);
assert(shader);
 
ilo_shader_cache_add(ilo->shader_cache, shader);
 
return shader;
}
 
static void
ilo_bind_compute_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
 
ilo->cs = state;
 
ilo->dirty |= ILO_DIRTY_CS;
}
 
static void
ilo_delete_compute_state(struct pipe_context *pipe, void *state)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_shader_state *cs = (struct ilo_shader_state *) state;
 
ilo_shader_cache_remove(ilo->shader_cache, cs);
ilo_shader_destroy(cs);
}
 
static void
ilo_set_compute_resources(struct pipe_context *pipe,
unsigned start, unsigned count,
struct pipe_surface **surfaces)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_resource_state *dst = &ilo->cs_resource;
unsigned i;
 
assert(start + count <= Elements(dst->states));
 
if (surfaces) {
for (i = 0; i < count; i++)
pipe_surface_reference(&dst->states[start + i], surfaces[i]);
}
else {
for (i = 0; i < count; i++)
pipe_surface_reference(&dst->states[start + i], NULL);
}
 
if (dst->count <= start + count) {
if (surfaces)
count += start;
else
count = start;
 
while (count > 0 && !dst->states[count - 1])
count--;
 
dst->count = count;
}
 
ilo->dirty |= ILO_DIRTY_CS_RESOURCE;
}
 
static void
ilo_set_global_binding(struct pipe_context *pipe,
unsigned start, unsigned count,
struct pipe_resource **resources,
uint32_t **handles)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_global_binding *dst = &ilo->global_binding;
unsigned i;
 
assert(start + count <= Elements(dst->resources));
 
if (resources) {
for (i = 0; i < count; i++)
pipe_resource_reference(&dst->resources[start + i], resources[i]);
}
else {
for (i = 0; i < count; i++)
pipe_resource_reference(&dst->resources[start + i], NULL);
}
 
if (dst->count <= start + count) {
if (resources)
count += start;
else
count = start;
 
while (count > 0 && !dst->resources[count - 1])
count--;
 
dst->count = count;
}
 
ilo->dirty |= ILO_DIRTY_GLOBAL_BINDING;
}
 
/**
* Initialize state-related functions.
*/
void
ilo_init_state_functions(struct ilo_context *ilo)
{
STATIC_ASSERT(ILO_STATE_COUNT <= 32);
 
ilo->base.create_blend_state = ilo_create_blend_state;
ilo->base.bind_blend_state = ilo_bind_blend_state;
ilo->base.delete_blend_state = ilo_delete_blend_state;
ilo->base.create_sampler_state = ilo_create_sampler_state;
ilo->base.bind_fragment_sampler_states = ilo_bind_fragment_sampler_states;
ilo->base.bind_vertex_sampler_states = ilo_bind_vertex_sampler_states;
ilo->base.bind_geometry_sampler_states = ilo_bind_geometry_sampler_states;
ilo->base.bind_compute_sampler_states = ilo_bind_compute_sampler_states;
ilo->base.delete_sampler_state = ilo_delete_sampler_state;
ilo->base.create_rasterizer_state = ilo_create_rasterizer_state;
ilo->base.bind_rasterizer_state = ilo_bind_rasterizer_state;
ilo->base.delete_rasterizer_state = ilo_delete_rasterizer_state;
ilo->base.create_depth_stencil_alpha_state = ilo_create_depth_stencil_alpha_state;
ilo->base.bind_depth_stencil_alpha_state = ilo_bind_depth_stencil_alpha_state;
ilo->base.delete_depth_stencil_alpha_state = ilo_delete_depth_stencil_alpha_state;
ilo->base.create_fs_state = ilo_create_fs_state;
ilo->base.bind_fs_state = ilo_bind_fs_state;
ilo->base.delete_fs_state = ilo_delete_fs_state;
ilo->base.create_vs_state = ilo_create_vs_state;
ilo->base.bind_vs_state = ilo_bind_vs_state;
ilo->base.delete_vs_state = ilo_delete_vs_state;
ilo->base.create_gs_state = ilo_create_gs_state;
ilo->base.bind_gs_state = ilo_bind_gs_state;
ilo->base.delete_gs_state = ilo_delete_gs_state;
ilo->base.create_vertex_elements_state = ilo_create_vertex_elements_state;
ilo->base.bind_vertex_elements_state = ilo_bind_vertex_elements_state;
ilo->base.delete_vertex_elements_state = ilo_delete_vertex_elements_state;
 
ilo->base.set_blend_color = ilo_set_blend_color;
ilo->base.set_stencil_ref = ilo_set_stencil_ref;
ilo->base.set_sample_mask = ilo_set_sample_mask;
ilo->base.set_clip_state = ilo_set_clip_state;
ilo->base.set_constant_buffer = ilo_set_constant_buffer;
ilo->base.set_framebuffer_state = ilo_set_framebuffer_state;
ilo->base.set_polygon_stipple = ilo_set_polygon_stipple;
ilo->base.set_scissor_states = ilo_set_scissor_states;
ilo->base.set_viewport_states = ilo_set_viewport_states;
ilo->base.set_fragment_sampler_views = ilo_set_fragment_sampler_views;
ilo->base.set_vertex_sampler_views = ilo_set_vertex_sampler_views;
ilo->base.set_geometry_sampler_views = ilo_set_geometry_sampler_views;
ilo->base.set_compute_sampler_views = ilo_set_compute_sampler_views;
ilo->base.set_shader_resources = ilo_set_shader_resources;
ilo->base.set_vertex_buffers = ilo_set_vertex_buffers;
ilo->base.set_index_buffer = ilo_set_index_buffer;
 
ilo->base.create_stream_output_target = ilo_create_stream_output_target;
ilo->base.stream_output_target_destroy = ilo_stream_output_target_destroy;
ilo->base.set_stream_output_targets = ilo_set_stream_output_targets;
 
ilo->base.create_sampler_view = ilo_create_sampler_view;
ilo->base.sampler_view_destroy = ilo_sampler_view_destroy;
 
ilo->base.create_surface = ilo_create_surface;
ilo->base.surface_destroy = ilo_surface_destroy;
 
ilo->base.create_compute_state = ilo_create_compute_state;
ilo->base.bind_compute_state = ilo_bind_compute_state;
ilo->base.delete_compute_state = ilo_delete_compute_state;
ilo->base.set_compute_resources = ilo_set_compute_resources;
ilo->base.set_global_binding = ilo_set_global_binding;
}
 
void
ilo_init_states(struct ilo_context *ilo)
{
ilo_gpe_set_scissor_null(ilo->dev, &ilo->scissor);
 
ilo_gpe_init_zs_surface(ilo->dev, NULL,
PIPE_FORMAT_NONE, 0, 0, 1, &ilo->fb.null_zs);
 
ilo->dirty = ILO_DIRTY_ALL;
}
 
void
ilo_cleanup_states(struct ilo_context *ilo)
{
unsigned i, sh;
 
for (i = 0; i < Elements(ilo->vb.states); i++) {
if (ilo->vb.enabled_mask & (1 << i))
pipe_resource_reference(&ilo->vb.states[i].buffer, NULL);
}
 
pipe_resource_reference(&ilo->ib.buffer, NULL);
pipe_resource_reference(&ilo->ib.hw_resource, NULL);
 
for (i = 0; i < ilo->so.count; i++)
pipe_so_target_reference(&ilo->so.states[i], NULL);
 
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
for (i = 0; i < ilo->view[sh].count; i++) {
struct pipe_sampler_view *view = ilo->view[sh].states[i];
pipe_sampler_view_reference(&view, NULL);
}
 
for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) {
struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i];
pipe_resource_reference(&cbuf->resource, NULL);
}
}
 
for (i = 0; i < ilo->resource.count; i++)
pipe_surface_reference(&ilo->resource.states[i], NULL);
 
for (i = 0; i < ilo->fb.state.nr_cbufs; i++)
pipe_surface_reference(&ilo->fb.state.cbufs[i], NULL);
 
if (ilo->fb.state.zsbuf)
pipe_surface_reference(&ilo->fb.state.zsbuf, NULL);
 
for (i = 0; i < ilo->cs_resource.count; i++)
pipe_surface_reference(&ilo->cs_resource.states[i], NULL);
 
for (i = 0; i < ilo->global_binding.count; i++)
pipe_resource_reference(&ilo->global_binding.resources[i], NULL);
}
 
/**
* Mark all states that have the resource dirty.
*/
void
ilo_mark_states_with_resource_dirty(struct ilo_context *ilo,
const struct pipe_resource *res)
{
uint32_t states = 0;
unsigned sh, i;
 
if (res->target == PIPE_BUFFER) {
uint32_t vb_mask = ilo->vb.enabled_mask;
 
while (vb_mask) {
const unsigned idx = u_bit_scan(&vb_mask);
 
if (ilo->vb.states[idx].buffer == res) {
states |= ILO_DIRTY_VB;
break;
}
}
 
if (ilo->ib.buffer == res) {
states |= ILO_DIRTY_IB;
 
/*
* finalize_index_buffer() has an optimization that clears
* ILO_DIRTY_IB when the HW states do not change. However, it fails
* to flush the VF cache when the HW states do not change, but the
* contents of the IB has changed. Here, we set the index size to an
* invalid value to avoid the optimization.
*/
ilo->ib.hw_index_size = 0;
}
 
for (i = 0; i < ilo->so.count; i++) {
if (ilo->so.states[i]->buffer == res) {
states |= ILO_DIRTY_SO;
break;
}
}
}
 
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
for (i = 0; i < ilo->view[sh].count; i++) {
struct pipe_sampler_view *view = ilo->view[sh].states[i];
 
if (view->texture == res) {
static const unsigned view_dirty_bits[PIPE_SHADER_TYPES] = {
[PIPE_SHADER_VERTEX] = ILO_DIRTY_VIEW_VS,
[PIPE_SHADER_FRAGMENT] = ILO_DIRTY_VIEW_FS,
[PIPE_SHADER_GEOMETRY] = ILO_DIRTY_VIEW_GS,
[PIPE_SHADER_COMPUTE] = ILO_DIRTY_VIEW_CS,
};
 
states |= view_dirty_bits[sh];
break;
}
}
 
if (res->target == PIPE_BUFFER) {
for (i = 0; i < Elements(ilo->cbuf[sh].cso); i++) {
struct ilo_cbuf_cso *cbuf = &ilo->cbuf[sh].cso[i];
 
if (cbuf->resource == res) {
states |= ILO_DIRTY_CBUF;
break;
}
}
}
}
 
for (i = 0; i < ilo->resource.count; i++) {
if (ilo->resource.states[i]->texture == res) {
states |= ILO_DIRTY_RESOURCE;
break;
}
}
 
/* for now? */
if (res->target != PIPE_BUFFER) {
for (i = 0; i < ilo->fb.state.nr_cbufs; i++) {
if (ilo->fb.state.cbufs[i]->texture == res) {
states |= ILO_DIRTY_FB;
break;
}
}
 
if (ilo->fb.state.zsbuf && ilo->fb.state.zsbuf->texture == res)
states |= ILO_DIRTY_FB;
}
 
for (i = 0; i < ilo->cs_resource.count; i++) {
pipe_surface_reference(&ilo->cs_resource.states[i], NULL);
if (ilo->cs_resource.states[i]->texture == res) {
states |= ILO_DIRTY_CS_RESOURCE;
break;
}
}
 
for (i = 0; i < ilo->global_binding.count; i++) {
if (ilo->global_binding.resources[i] == res) {
states |= ILO_DIRTY_GLOBAL_BINDING;
break;
}
}
 
ilo->dirty |= states;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_state.h
0,0 → 1,139
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_STATE_H
#define ILO_STATE_H
 
#include "ilo_common.h"
 
/**
* States that we track.
*
* XXX Do we want to count each sampler or vertex buffer as a state? If that
* is the case, there are simply not enough bits.
*
* XXX We want to treat primitive type and depth clear value as states, but
* there are not enough bits.
*/
enum ilo_state {
ILO_STATE_VB,
ILO_STATE_VE,
ILO_STATE_IB,
ILO_STATE_VS,
ILO_STATE_GS,
ILO_STATE_SO,
ILO_STATE_CLIP,
ILO_STATE_VIEWPORT,
ILO_STATE_SCISSOR,
ILO_STATE_RASTERIZER,
ILO_STATE_POLY_STIPPLE,
ILO_STATE_SAMPLE_MASK,
ILO_STATE_FS,
ILO_STATE_DSA,
ILO_STATE_STENCIL_REF,
ILO_STATE_BLEND,
ILO_STATE_BLEND_COLOR,
ILO_STATE_FB,
 
ILO_STATE_SAMPLER_VS,
ILO_STATE_SAMPLER_GS,
ILO_STATE_SAMPLER_FS,
ILO_STATE_SAMPLER_CS,
ILO_STATE_VIEW_VS,
ILO_STATE_VIEW_GS,
ILO_STATE_VIEW_FS,
ILO_STATE_VIEW_CS,
ILO_STATE_CBUF,
ILO_STATE_RESOURCE,
 
ILO_STATE_CS,
ILO_STATE_CS_RESOURCE,
ILO_STATE_GLOBAL_BINDING,
 
ILO_STATE_COUNT,
};
 
/**
* Dirty flags of the states.
*/
enum ilo_dirty_flags {
ILO_DIRTY_VB = 1 << ILO_STATE_VB,
ILO_DIRTY_VE = 1 << ILO_STATE_VE,
ILO_DIRTY_IB = 1 << ILO_STATE_IB,
ILO_DIRTY_VS = 1 << ILO_STATE_VS,
ILO_DIRTY_GS = 1 << ILO_STATE_GS,
ILO_DIRTY_SO = 1 << ILO_STATE_SO,
ILO_DIRTY_CLIP = 1 << ILO_STATE_CLIP,
ILO_DIRTY_VIEWPORT = 1 << ILO_STATE_VIEWPORT,
ILO_DIRTY_SCISSOR = 1 << ILO_STATE_SCISSOR,
ILO_DIRTY_RASTERIZER = 1 << ILO_STATE_RASTERIZER,
ILO_DIRTY_POLY_STIPPLE = 1 << ILO_STATE_POLY_STIPPLE,
ILO_DIRTY_SAMPLE_MASK = 1 << ILO_STATE_SAMPLE_MASK,
ILO_DIRTY_FS = 1 << ILO_STATE_FS,
ILO_DIRTY_DSA = 1 << ILO_STATE_DSA,
ILO_DIRTY_STENCIL_REF = 1 << ILO_STATE_STENCIL_REF,
ILO_DIRTY_BLEND = 1 << ILO_STATE_BLEND,
ILO_DIRTY_BLEND_COLOR = 1 << ILO_STATE_BLEND_COLOR,
ILO_DIRTY_FB = 1 << ILO_STATE_FB,
ILO_DIRTY_SAMPLER_VS = 1 << ILO_STATE_SAMPLER_VS,
ILO_DIRTY_SAMPLER_GS = 1 << ILO_STATE_SAMPLER_GS,
ILO_DIRTY_SAMPLER_FS = 1 << ILO_STATE_SAMPLER_FS,
ILO_DIRTY_SAMPLER_CS = 1 << ILO_STATE_SAMPLER_CS,
ILO_DIRTY_VIEW_VS = 1 << ILO_STATE_VIEW_VS,
ILO_DIRTY_VIEW_GS = 1 << ILO_STATE_VIEW_GS,
ILO_DIRTY_VIEW_FS = 1 << ILO_STATE_VIEW_FS,
ILO_DIRTY_VIEW_CS = 1 << ILO_STATE_VIEW_CS,
ILO_DIRTY_CBUF = 1 << ILO_STATE_CBUF,
ILO_DIRTY_RESOURCE = 1 << ILO_STATE_RESOURCE,
ILO_DIRTY_CS = 1 << ILO_STATE_CS,
ILO_DIRTY_CS_RESOURCE = 1 << ILO_STATE_CS_RESOURCE,
ILO_DIRTY_GLOBAL_BINDING = 1 << ILO_STATE_GLOBAL_BINDING,
ILO_DIRTY_ALL = 0xffffffff,
};
 
struct pipe_draw_info;
struct pipe_resource;
struct ilo_context;
 
void
ilo_init_state_functions(struct ilo_context *ilo);
 
void
ilo_init_states(struct ilo_context *ilo);
 
void
ilo_cleanup_states(struct ilo_context *ilo);
 
void
ilo_finalize_3d_states(struct ilo_context *ilo,
const struct pipe_draw_info *draw);
 
void
ilo_mark_states_with_resource_dirty(struct ilo_context *ilo,
const struct pipe_resource *res);
 
#endif /* ILO_STATE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_transfer.c
0,0 → 1,1050
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "util/u_surface.h"
#include "util/u_transfer.h"
#include "util/u_format_etc.h"
 
#include "ilo_cp.h"
#include "ilo_context.h"
#include "ilo_resource.h"
#include "ilo_state.h"
#include "ilo_transfer.h"
 
static bool
is_bo_busy(struct ilo_context *ilo, struct intel_bo *bo, bool *need_flush)
{
const bool referenced = intel_bo_references(ilo->cp->bo, bo);
 
if (need_flush)
*need_flush = referenced;
 
if (referenced)
return true;
 
return intel_bo_is_busy(bo);
}
 
static bool
map_bo_for_transfer(struct ilo_context *ilo, struct intel_bo *bo,
const struct ilo_transfer *xfer)
{
int err;
 
switch (xfer->method) {
case ILO_TRANSFER_MAP_CPU:
err = intel_bo_map(bo, (xfer->base.usage & PIPE_TRANSFER_WRITE));
break;
case ILO_TRANSFER_MAP_GTT:
err = intel_bo_map_gtt(bo);
break;
case ILO_TRANSFER_MAP_UNSYNC:
err = intel_bo_map_unsynchronized(bo);
break;
default:
assert(!"unknown mapping method");
err = -1;
break;
}
 
return !err;
}
 
/**
* Choose the best mapping method, depending on the transfer usage and whether
* the bo is busy.
*/
static bool
choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
{
struct pipe_resource *res = xfer->base.resource;
const unsigned usage = xfer->base.usage;
/* prefer map() when there is the last-level cache */
const bool prefer_cpu =
(ilo->dev->has_llc || (usage & PIPE_TRANSFER_READ));
struct ilo_texture *tex;
struct ilo_buffer *buf;
struct intel_bo *bo;
bool tiled, need_flush;
 
if (res->target == PIPE_BUFFER) {
tex = NULL;
 
buf = ilo_buffer(res);
bo = buf->bo;
tiled = false;
}
else {
buf = NULL;
 
tex = ilo_texture(res);
bo = tex->bo;
tiled = (tex->tiling != INTEL_TILING_NONE);
}
 
/* choose between mapping through CPU or GTT */
if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
/* we do not want fencing */
if (tiled || prefer_cpu)
xfer->method = ILO_TRANSFER_MAP_CPU;
else
xfer->method = ILO_TRANSFER_MAP_GTT;
}
else {
if (!tiled && prefer_cpu)
xfer->method = ILO_TRANSFER_MAP_CPU;
else
xfer->method = ILO_TRANSFER_MAP_GTT;
}
 
/* see if we can avoid stalling */
if (is_bo_busy(ilo, bo, &need_flush)) {
bool will_stall = true;
 
if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
/* nothing we can do */
}
else if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
/* unsynchronized gtt mapping does not stall */
xfer->method = ILO_TRANSFER_MAP_UNSYNC;
will_stall = false;
}
else if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
/* discard old bo and allocate a new one for mapping */
if ((tex && ilo_texture_alloc_bo(tex)) ||
(buf && ilo_buffer_alloc_bo(buf))) {
ilo_mark_states_with_resource_dirty(ilo, res);
will_stall = false;
}
}
else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
/*
* We could allocate and return a system buffer here. When a region of
* the buffer is explicitly flushed, we pwrite() the region to a
* temporary bo and emit pipelined copy blit.
*
* For now, do nothing.
*/
}
else if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
/*
* We could allocate a temporary bo for mapping, and emit pipelined copy
* blit upon unmapping.
*
* For now, do nothing.
*/
}
 
if (will_stall) {
if (usage & PIPE_TRANSFER_DONTBLOCK)
return false;
 
/* flush to make bo busy (so that map() stalls as it should be) */
if (need_flush)
ilo_cp_flush(ilo->cp);
}
}
 
if (tex && !(usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
if (tex->separate_s8 || tex->bo_format == PIPE_FORMAT_S8_UINT)
xfer->method = ILO_TRANSFER_MAP_SW_ZS;
/* need to convert on-the-fly */
else if (tex->bo_format != tex->base.format)
xfer->method = ILO_TRANSFER_MAP_SW_CONVERT;
}
 
return true;
}
 
static void
tex_get_box_origin(const struct ilo_texture *tex,
unsigned level, unsigned slice,
const struct pipe_box *box,
unsigned *mem_x, unsigned *mem_y)
{
unsigned x, y;
 
x = tex->slice_offsets[level][slice + box->z].x + box->x;
y = tex->slice_offsets[level][slice + box->z].y + box->y;
 
assert(x % tex->block_width == 0 && y % tex->block_height == 0);
 
*mem_x = x / tex->block_width * tex->bo_cpp;
*mem_y = y / tex->block_height;
}
 
static unsigned
tex_get_box_offset(const struct ilo_texture *tex, unsigned level,
const struct pipe_box *box)
{
unsigned mem_x, mem_y;
 
tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y);
 
return mem_y * tex->bo_stride + mem_x;
}
 
static unsigned
tex_get_slice_stride(const struct ilo_texture *tex, unsigned level)
{
unsigned qpitch;
 
/* there is no 3D array texture */
assert(tex->base.array_size == 1 || tex->base.depth0 == 1);
 
if (tex->base.array_size == 1) {
/* non-array, non-3D */
if (tex->base.depth0 == 1)
return 0;
 
/* only the first level has a fixed slice stride */
if (level > 0) {
assert(!"no slice stride for 3D texture with level > 0");
return 0;
}
}
 
qpitch = tex->slice_offsets[level][1].y - tex->slice_offsets[level][0].y;
assert(qpitch % tex->block_height == 0);
 
return (qpitch / tex->block_height) * tex->bo_stride;
}
 
static unsigned
tex_tile_x_swizzle(unsigned addr)
{
/*
* From the Ivy Bridge PRM, volume 1 part 2, page 24:
*
* "As shown in the tiling algorithm, the new address bit[6] should be:
*
* Address bit[6] <= TiledAddr bit[6] XOR
* TiledAddr bit[9] XOR
* TiledAddr bit[10]"
*/
return addr ^ (((addr >> 3) ^ (addr >> 4)) & 0x40);
}
 
static unsigned
tex_tile_y_swizzle(unsigned addr)
{
/*
* From the Ivy Bridge PRM, volume 1 part 2, page 24:
*
* "As shown in the tiling algorithm, The new address bit[6] becomes:
*
* Address bit[6] <= TiledAddr bit[6] XOR
* TiledAddr bit[9]"
*/
return addr ^ ((addr >> 3) & 0x40);
}
 
static unsigned
tex_tile_x_offset(unsigned mem_x, unsigned mem_y,
unsigned tiles_per_row, bool swizzle)
{
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 21, we know that a
* X-major tile has 8 rows and 32 OWord columns (512 bytes). Tiles in the
* tiled region are numbered in row-major order, starting from zero. The
* tile number can thus be calculated as follows:
*
* tile = (mem_y / 8) * tiles_per_row + (mem_x / 512)
*
* OWords in that tile are also numbered in row-major order, starting from
* zero. The OWord number can thus be calculated as follows:
*
* oword = (mem_y % 8) * 32 + ((mem_x % 512) / 16)
*
* and the tiled offset is
*
* offset = tile * 4096 + oword * 16 + (mem_x % 16)
* = tile * 4096 + (mem_y % 8) * 512 + (mem_x % 512)
*/
unsigned tile, offset;
 
tile = (mem_y >> 3) * tiles_per_row + (mem_x >> 9);
offset = tile << 12 | (mem_y & 0x7) << 9 | (mem_x & 0x1ff);
 
return (swizzle) ? tex_tile_x_swizzle(offset) : offset;
}
 
static unsigned
tex_tile_y_offset(unsigned mem_x, unsigned mem_y,
unsigned tiles_per_row, bool swizzle)
{
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 22, we know that a
* Y-major tile has 32 rows and 8 OWord columns (128 bytes). Tiles in the
* tiled region are numbered in row-major order, starting from zero. The
* tile number can thus be calculated as follows:
*
* tile = (mem_y / 32) * tiles_per_row + (mem_x / 128)
*
* OWords in that tile are numbered in column-major order, starting from
* zero. The OWord number can thus be calculated as follows:
*
* oword = ((mem_x % 128) / 16) * 32 + (mem_y % 32)
*
* and the tiled offset is
*
* offset = tile * 4096 + oword * 16 + (mem_x % 16)
*/
unsigned tile, oword, offset;
 
tile = (mem_y >> 5) * tiles_per_row + (mem_x >> 7);
oword = (mem_x & 0x70) << 1 | (mem_y & 0x1f);
offset = tile << 12 | oword << 4 | (mem_x & 0xf);
 
return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
}
 
static unsigned
tex_tile_w_offset(unsigned mem_x, unsigned mem_y,
unsigned tiles_per_row, bool swizzle)
{
/*
* From the Sandy Bridge PRM, volume 1 part 2, page 23, we know that a
* W-major tile has 8 8x8-block rows and 8 8x8-block columns. Tiles in the
* tiled region are numbered in row-major order, starting from zero. The
* tile number can thus be calculated as follows:
*
* tile = (mem_y / 64) * tiles_per_row + (mem_x / 64)
*
* 8x8-blocks in that tile are numbered in column-major order, starting
* from zero. The 8x8-block number can thus be calculated as follows:
*
* blk8 = ((mem_x % 64) / 8) * 8 + ((mem_y % 64) / 8)
*
* Each 8x8-block is divided into 4 4x4-blocks, in row-major order. Each
* 4x4-block is further divided into 4 2x2-blocks, also in row-major order.
* We have
*
* blk4 = (((mem_y % 64) / 4) & 1) * 2 + (((mem_x % 64) / 4) & 1)
* blk2 = (((mem_y % 64) / 2) & 1) * 2 + (((mem_x % 64) / 2) & 1)
* blk1 = (((mem_y % 64) ) & 1) * 2 + (((mem_x % 64) ) & 1)
*
* and the tiled offset is
*
* offset = tile * 4096 + blk8 * 64 + blk4 * 16 + blk2 * 4 + blk1
*/
unsigned tile, blk8, blk4, blk2, blk1, offset;
 
tile = (mem_y >> 6) * tiles_per_row + (mem_x >> 6);
blk8 = ((mem_x >> 3) & 0x7) << 3 | ((mem_y >> 3) & 0x7);
blk4 = ((mem_y >> 2) & 0x1) << 1 | ((mem_x >> 2) & 0x1);
blk2 = ((mem_y >> 1) & 0x1) << 1 | ((mem_x >> 1) & 0x1);
blk1 = ((mem_y ) & 0x1) << 1 | ((mem_x ) & 0x1);
offset = tile << 12 | blk8 << 6 | blk4 << 4 | blk2 << 2 | blk1;
 
return (swizzle) ? tex_tile_y_swizzle(offset) : offset;
}
 
static unsigned
tex_tile_none_offset(unsigned mem_x, unsigned mem_y,
unsigned tiles_per_row, bool swizzle)
{
return mem_y * tiles_per_row + mem_x;
}
 
typedef unsigned (*tex_tile_offset_func)(unsigned mem_x, unsigned mem_y,
unsigned tiles_per_row,
bool swizzle);
 
static tex_tile_offset_func
tex_tile_choose_offset_func(const struct ilo_texture *tex,
unsigned *tiles_per_row)
{
switch (tex->tiling) {
case INTEL_TILING_X:
*tiles_per_row = tex->bo_stride / 512;
return tex_tile_x_offset;
case INTEL_TILING_Y:
*tiles_per_row = tex->bo_stride / 128;
return tex_tile_y_offset;
case INTEL_TILING_NONE:
default:
/* W-tiling */
if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
*tiles_per_row = tex->bo_stride / 64;
return tex_tile_w_offset;
}
else {
*tiles_per_row = tex->bo_stride;
return tex_tile_none_offset;
}
}
}
 
static void
tex_staging_sys_zs_read(struct ilo_context *ilo,
struct ilo_texture *tex,
const struct ilo_transfer *xfer)
{
const bool swizzle = ilo->dev->has_address_swizzling;
const struct pipe_box *box = &xfer->base.box;
const uint8_t *src = intel_bo_get_virtual(tex->bo);
tex_tile_offset_func tile_offset;
unsigned tiles_per_row;
int slice;
 
tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 
assert(tex->block_width == 1 && tex->block_height == 1);
 
if (tex->separate_s8) {
struct ilo_texture *s8_tex = tex->separate_s8;
const uint8_t *s8_src = intel_bo_get_virtual(s8_tex->bo);
tex_tile_offset_func s8_tile_offset;
unsigned s8_tiles_per_row;
int dst_cpp, dst_s8_pos, src_cpp_used;
 
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
 
dst_cpp = 4;
dst_s8_pos = 3;
src_cpp_used = 3;
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
 
dst_cpp = 8;
dst_s8_pos = 4;
src_cpp_used = 4;
}
 
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
uint8_t *dst;
int i, j;
 
tex_get_box_origin(tex, xfer->base.level, slice,
box, &mem_x, &mem_y);
tex_get_box_origin(s8_tex, xfer->base.level, slice,
box, &s8_mem_x, &s8_mem_y);
 
dst = xfer->staging_sys + xfer->base.layer_stride * slice;
 
for (i = 0; i < box->height; i++) {
unsigned x = mem_x, s8_x = s8_mem_x;
uint8_t *d = dst;
 
for (j = 0; j < box->width; j++) {
const unsigned offset =
tile_offset(x, mem_y, tiles_per_row, swizzle);
const unsigned s8_offset =
s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 
memcpy(d, src + offset, src_cpp_used);
d[dst_s8_pos] = s8_src[s8_offset];
 
d += dst_cpp;
x += tex->bo_cpp;
s8_x++;
}
 
dst += xfer->base.stride;
mem_y++;
s8_mem_y++;
}
}
}
else {
assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
 
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
uint8_t *dst;
int i, j;
 
tex_get_box_origin(tex, xfer->base.level, slice,
box, &mem_x, &mem_y);
 
dst = xfer->staging_sys + xfer->base.layer_stride * slice;
 
for (i = 0; i < box->height; i++) {
unsigned x = mem_x;
uint8_t *d = dst;
 
for (j = 0; j < box->width; j++) {
const unsigned offset =
tile_offset(x, mem_y, tiles_per_row, swizzle);
 
*d = src[offset];
 
d++;
x++;
}
 
dst += xfer->base.stride;
mem_y++;
}
}
}
}
 
static void
tex_staging_sys_zs_write(struct ilo_context *ilo,
struct ilo_texture *tex,
const struct ilo_transfer *xfer)
{
const bool swizzle = ilo->dev->has_address_swizzling;
const struct pipe_box *box = &xfer->base.box;
uint8_t *dst = intel_bo_get_virtual(tex->bo);
tex_tile_offset_func tile_offset;
unsigned tiles_per_row;
int slice;
 
tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row);
 
assert(tex->block_width == 1 && tex->block_height == 1);
 
if (tex->separate_s8) {
struct ilo_texture *s8_tex = tex->separate_s8;
uint8_t *s8_dst = intel_bo_get_virtual(s8_tex->bo);
tex_tile_offset_func s8_tile_offset;
unsigned s8_tiles_per_row;
int src_cpp, src_s8_pos, dst_cpp_used;
 
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
 
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
assert(tex->bo_format == PIPE_FORMAT_Z24X8_UNORM);
 
src_cpp = 4;
src_s8_pos = 3;
dst_cpp_used = 3;
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
assert(tex->bo_format == PIPE_FORMAT_Z32_FLOAT);
 
src_cpp = 8;
src_s8_pos = 4;
dst_cpp_used = 4;
}
 
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y, s8_mem_x, s8_mem_y;
const uint8_t *src;
int i, j;
 
tex_get_box_origin(tex, xfer->base.level, slice,
box, &mem_x, &mem_y);
tex_get_box_origin(s8_tex, xfer->base.level, slice,
box, &s8_mem_x, &s8_mem_y);
 
src = xfer->staging_sys + xfer->base.layer_stride * slice;
 
for (i = 0; i < box->height; i++) {
unsigned x = mem_x, s8_x = s8_mem_x;
const uint8_t *s = src;
 
for (j = 0; j < box->width; j++) {
const unsigned offset =
tile_offset(x, mem_y, tiles_per_row, swizzle);
const unsigned s8_offset =
s8_tile_offset(s8_x, s8_mem_y, s8_tiles_per_row, swizzle);
 
memcpy(dst + offset, s, dst_cpp_used);
s8_dst[s8_offset] = s[src_s8_pos];
 
s += src_cpp;
x += tex->bo_cpp;
s8_x++;
}
 
src += xfer->base.stride;
mem_y++;
s8_mem_y++;
}
}
}
else {
assert(tex->bo_format == PIPE_FORMAT_S8_UINT);
 
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
const uint8_t *src;
int i, j;
 
tex_get_box_origin(tex, xfer->base.level, slice,
box, &mem_x, &mem_y);
 
src = xfer->staging_sys + xfer->base.layer_stride * slice;
 
for (i = 0; i < box->height; i++) {
unsigned x = mem_x;
const uint8_t *s = src;
 
for (j = 0; j < box->width; j++) {
const unsigned offset =
tile_offset(x, mem_y, tiles_per_row, swizzle);
 
dst[offset] = *s;
 
s++;
x++;
}
 
src += xfer->base.stride;
mem_y++;
}
}
}
}
 
static void
tex_staging_sys_convert_write(struct ilo_context *ilo,
struct ilo_texture *tex,
const struct ilo_transfer *xfer)
{
const struct pipe_box *box = &xfer->base.box;
unsigned dst_slice_stride;
void *dst;
int slice;
 
dst = intel_bo_get_virtual(tex->bo);
dst += tex_get_box_offset(tex, xfer->base.level, box);
 
/* slice stride is not always available */
if (box->depth > 1)
dst_slice_stride = tex_get_slice_stride(tex, xfer->base.level);
else
dst_slice_stride = 0;
 
if (unlikely(tex->bo_format == tex->base.format)) {
util_copy_box(dst, tex->bo_format, tex->bo_stride, dst_slice_stride,
0, 0, 0, box->width, box->height, box->depth,
xfer->staging_sys, xfer->base.stride, xfer->base.layer_stride,
0, 0, 0);
return;
}
 
switch (tex->base.format) {
case PIPE_FORMAT_ETC1_RGB8:
assert(tex->bo_format == PIPE_FORMAT_R8G8B8X8_UNORM);
 
for (slice = 0; slice < box->depth; slice++) {
const void *src =
xfer->staging_sys + xfer->base.layer_stride * slice;
 
util_format_etc1_rgb8_unpack_rgba_8unorm(dst,
tex->bo_stride, src, xfer->base.stride,
box->width, box->height);
 
dst += dst_slice_stride;
}
break;
default:
assert(!"unable to convert the staging data");
break;
}
}
 
static bool
tex_staging_sys_map_bo(const struct ilo_context *ilo,
const struct ilo_texture *tex,
bool for_read_back, bool linear_view)
{
const bool prefer_cpu = (ilo->dev->has_llc || for_read_back);
int err;
 
if (prefer_cpu && (tex->tiling == INTEL_TILING_NONE || !linear_view))
err = intel_bo_map(tex->bo, !for_read_back);
else
err = intel_bo_map_gtt(tex->bo);
 
if (!tex->separate_s8)
return !err;
 
err = intel_bo_map(tex->separate_s8->bo, !for_read_back);
if (err)
intel_bo_unmap(tex->bo);
 
return !err;
}
 
static void
tex_staging_sys_unmap_bo(const struct ilo_context *ilo,
const struct ilo_texture *tex)
{
if (tex->separate_s8)
intel_bo_unmap(tex->separate_s8->bo);
 
intel_bo_unmap(tex->bo);
}
 
static void
tex_staging_sys_unmap(struct ilo_context *ilo,
struct ilo_texture *tex,
struct ilo_transfer *xfer)
{
bool success;
 
if (!(xfer->base.usage & PIPE_TRANSFER_WRITE)) {
FREE(xfer->staging_sys);
return;
}
 
switch (xfer->method) {
case ILO_TRANSFER_MAP_SW_CONVERT:
success = tex_staging_sys_map_bo(ilo, tex, false, true);
if (success) {
tex_staging_sys_convert_write(ilo, tex, xfer);
tex_staging_sys_unmap_bo(ilo, tex);
}
break;
case ILO_TRANSFER_MAP_SW_ZS:
success = tex_staging_sys_map_bo(ilo, tex, false, false);
if (success) {
tex_staging_sys_zs_write(ilo, tex, xfer);
tex_staging_sys_unmap_bo(ilo, tex);
}
break;
default:
assert(!"unknown mapping method");
success = false;
break;
}
 
if (!success)
ilo_err("failed to map resource for moving staging data\n");
 
FREE(xfer->staging_sys);
}
 
static bool
tex_staging_sys_map(struct ilo_context *ilo,
struct ilo_texture *tex,
struct ilo_transfer *xfer)
{
const struct pipe_box *box = &xfer->base.box;
const size_t stride = util_format_get_stride(tex->base.format, box->width);
const size_t size =
util_format_get_2d_size(tex->base.format, stride, box->height);
bool read_back = false, success;
 
xfer->staging_sys = MALLOC(size * box->depth);
if (!xfer->staging_sys)
return false;
 
xfer->base.stride = stride;
xfer->base.layer_stride = size;
xfer->ptr = xfer->staging_sys;
 
/* see if we need to read the resource back */
if (xfer->base.usage & PIPE_TRANSFER_READ) {
read_back = true;
}
else if (xfer->base.usage & PIPE_TRANSFER_WRITE) {
const unsigned discard_flags =
(PIPE_TRANSFER_DISCARD_RANGE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE);
 
if (!(xfer->base.usage & discard_flags))
read_back = true;
}
 
if (!read_back)
return true;
 
switch (xfer->method) {
case ILO_TRANSFER_MAP_SW_CONVERT:
assert(!"no on-the-fly format conversion for mapping");
success = false;
break;
case ILO_TRANSFER_MAP_SW_ZS:
success = tex_staging_sys_map_bo(ilo, tex, true, false);
if (success) {
tex_staging_sys_zs_read(ilo, tex, xfer);
tex_staging_sys_unmap_bo(ilo, tex);
}
break;
default:
assert(!"unknown mapping method");
success = false;
break;
}
 
return success;
}
 
static void
tex_direct_unmap(struct ilo_context *ilo,
struct ilo_texture *tex,
struct ilo_transfer *xfer)
{
intel_bo_unmap(tex->bo);
}
 
static bool
tex_direct_map(struct ilo_context *ilo,
struct ilo_texture *tex,
struct ilo_transfer *xfer)
{
if (!map_bo_for_transfer(ilo, tex->bo, xfer))
return false;
 
/* note that stride is for a block row, not a texel row */
xfer->base.stride = tex->bo_stride;
 
/* slice stride is not always available */
if (xfer->base.box.depth > 1)
xfer->base.layer_stride = tex_get_slice_stride(tex, xfer->base.level);
else
xfer->base.layer_stride = 0;
 
xfer->ptr = intel_bo_get_virtual(tex->bo);
xfer->ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box);
 
return true;
}
 
static bool
tex_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
{
struct ilo_texture *tex = ilo_texture(xfer->base.resource);
bool success;
 
if (!choose_transfer_method(ilo, xfer))
return false;
 
switch (xfer->method) {
case ILO_TRANSFER_MAP_CPU:
case ILO_TRANSFER_MAP_GTT:
case ILO_TRANSFER_MAP_UNSYNC:
success = tex_direct_map(ilo, tex, xfer);
break;
case ILO_TRANSFER_MAP_SW_CONVERT:
case ILO_TRANSFER_MAP_SW_ZS:
success = tex_staging_sys_map(ilo, tex, xfer);
break;
default:
assert(!"unknown mapping method");
success = false;
break;
}
 
return success;
}
 
static void
tex_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
{
struct ilo_texture *tex = ilo_texture(xfer->base.resource);
 
switch (xfer->method) {
case ILO_TRANSFER_MAP_CPU:
case ILO_TRANSFER_MAP_GTT:
case ILO_TRANSFER_MAP_UNSYNC:
tex_direct_unmap(ilo, tex, xfer);
break;
case ILO_TRANSFER_MAP_SW_CONVERT:
case ILO_TRANSFER_MAP_SW_ZS:
tex_staging_sys_unmap(ilo, tex, xfer);
break;
default:
assert(!"unknown mapping method");
break;
}
}
 
static bool
buf_map(struct ilo_context *ilo, struct ilo_transfer *xfer)
{
struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
 
if (!choose_transfer_method(ilo, xfer))
return false;
 
if (!map_bo_for_transfer(ilo, buf->bo, xfer))
return false;
 
assert(xfer->base.level == 0);
assert(xfer->base.box.y == 0);
assert(xfer->base.box.z == 0);
assert(xfer->base.box.height == 1);
assert(xfer->base.box.depth == 1);
 
xfer->base.stride = 0;
xfer->base.layer_stride = 0;
 
xfer->ptr = intel_bo_get_virtual(buf->bo);
xfer->ptr += xfer->base.box.x;
 
return true;
}
 
static void
buf_unmap(struct ilo_context *ilo, struct ilo_transfer *xfer)
{
struct ilo_buffer *buf = ilo_buffer(xfer->base.resource);
 
intel_bo_unmap(buf->bo);
}
 
static void
buf_pwrite(struct ilo_context *ilo, struct ilo_buffer *buf,
unsigned usage, int offset, int size, const void *data)
{
bool need_flush;
 
/* see if we can avoid stalling */
if (is_bo_busy(ilo, buf->bo, &need_flush)) {
bool will_stall = true;
 
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
/* old data not needed so discard the old bo to avoid stalling */
if (ilo_buffer_alloc_bo(buf)) {
ilo_mark_states_with_resource_dirty(ilo, &buf->base);
will_stall = false;
}
}
else {
/*
* We could allocate a temporary bo to hold the data and emit
* pipelined copy blit to move them to buf->bo. But for now, do
* nothing.
*/
}
 
/* flush to make bo busy (so that pwrite() stalls as it should be) */
if (will_stall && need_flush)
ilo_cp_flush(ilo->cp);
}
 
intel_bo_pwrite(buf->bo, offset, size, data);
}
 
static void
ilo_transfer_flush_region(struct pipe_context *pipe,
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
}
 
static void
ilo_transfer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_transfer *xfer = ilo_transfer(transfer);
 
if (xfer->base.resource->target == PIPE_BUFFER)
buf_unmap(ilo, xfer);
else
tex_unmap(ilo, xfer);
 
pipe_resource_reference(&xfer->base.resource, NULL);
 
util_slab_free(&ilo->transfer_mempool, xfer);
}
 
static void *
ilo_transfer_map(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned level,
unsigned usage,
const struct pipe_box *box,
struct pipe_transfer **transfer)
{
struct ilo_context *ilo = ilo_context(pipe);
struct ilo_transfer *xfer;
bool success;
 
xfer = util_slab_alloc(&ilo->transfer_mempool);
if (!xfer) {
*transfer = NULL;
return NULL;
}
 
xfer->base.resource = NULL;
pipe_resource_reference(&xfer->base.resource, res);
xfer->base.level = level;
xfer->base.usage = usage;
xfer->base.box = *box;
 
if (res->target == PIPE_BUFFER)
success = buf_map(ilo, xfer);
else
success = tex_map(ilo, xfer);
 
if (!success) {
pipe_resource_reference(&xfer->base.resource, NULL);
FREE(xfer);
*transfer = NULL;
return NULL;
}
 
*transfer = &xfer->base;
 
return xfer->ptr;
}
 
static void
ilo_transfer_inline_write(struct pipe_context *pipe,
struct pipe_resource *res,
unsigned level,
unsigned usage,
const struct pipe_box *box,
const void *data,
unsigned stride,
unsigned layer_stride)
{
if (likely(res->target == PIPE_BUFFER) &&
!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
/* they should specify just an offset and a size */
assert(level == 0);
assert(box->y == 0);
assert(box->z == 0);
assert(box->height == 1);
assert(box->depth == 1);
 
buf_pwrite(ilo_context(pipe), ilo_buffer(res),
usage, box->x, box->width, data);
}
else {
u_default_transfer_inline_write(pipe, res,
level, usage, box, data, stride, layer_stride);
}
}
 
/**
* Initialize transfer-related functions.
*/
void
ilo_init_transfer_functions(struct ilo_context *ilo)
{
ilo->base.transfer_map = ilo_transfer_map;
ilo->base.transfer_flush_region = ilo_transfer_flush_region;
ilo->base.transfer_unmap = ilo_transfer_unmap;
ilo->base.transfer_inline_write = ilo_transfer_inline_write;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_transfer.h
0,0 → 1,66
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_TRANSFER_H
#define ILO_TRANSFER_H
 
#include "pipe/p_state.h"
 
#include "ilo_common.h"
 
enum ilo_transfer_map_method {
/* map() / map_gtt() / map_unsynchronized() */
ILO_TRANSFER_MAP_CPU,
ILO_TRANSFER_MAP_GTT,
ILO_TRANSFER_MAP_UNSYNC,
 
/* use staging system buffer */
ILO_TRANSFER_MAP_SW_CONVERT,
ILO_TRANSFER_MAP_SW_ZS,
};
 
struct ilo_transfer {
struct pipe_transfer base;
 
enum ilo_transfer_map_method method;
void *ptr;
 
void *staging_sys;
};
 
struct ilo_context;
 
static inline struct ilo_transfer *
ilo_transfer(struct pipe_transfer *transfer)
{
return (struct ilo_transfer *) transfer;
}
 
void
ilo_init_transfer_functions(struct ilo_context *ilo);
 
#endif /* ILO_TRANSFER_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_video.c
0,0 → 1,65
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "vl/vl_decoder.h"
#include "vl/vl_video_buffer.h"
 
#include "ilo_context.h"
#include "ilo_video.h"
 
/*
* Nothing here. We could make use of the video codec engine someday.
*/
 
static struct pipe_video_decoder *
ilo_create_video_decoder(struct pipe_context *pipe,
enum pipe_video_profile profile,
enum pipe_video_entrypoint entrypoint,
enum pipe_video_chroma_format chroma_format,
unsigned width, unsigned height, unsigned max_references,
bool expect_chunked_decode)
{
return vl_create_decoder(pipe, profile, entrypoint, chroma_format,
width, height, max_references, expect_chunked_decode);
}
 
static struct pipe_video_buffer *
ilo_create_video_buffer(struct pipe_context *pipe,
const struct pipe_video_buffer *templ)
{
return vl_video_buffer_create(pipe, templ);
}
 
/**
* Initialize video-related functions.
*/
void
ilo_init_video_functions(struct ilo_context *ilo)
{
ilo->base.create_video_decoder = ilo_create_video_decoder;
ilo->base.create_video_buffer = ilo_create_video_buffer;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/ilo_video.h
0,0 → 1,38
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_VIDEO_H
#define ILO_VIDEO_H
 
#include "ilo_common.h"
 
struct ilo_context;
 
void
ilo_init_video_functions(struct ilo_context *ilo);
 
#endif /* ILO_VIDEO_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/brw_defines.h
0,0 → 1,1728
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK)
#define GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT)
 
#ifndef BRW_DEFINES_H
#define BRW_DEFINES_H
 
/* 3D state:
*/
#define PIPE_CONTROL_NOWRITE 0x00
#define PIPE_CONTROL_WRITEIMMEDIATE 0x01
#define PIPE_CONTROL_WRITEDEPTH 0x02
#define PIPE_CONTROL_WRITETIMESTAMP 0x03
 
#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01
 
#define CMD_3D_PRIM 0x7b00 /* 3DPRIMITIVE */
/* DW0 */
# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT 10
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15)
/* DW1 */
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define BRW_ANISORATIO_2 0
#define BRW_ANISORATIO_4 1
#define BRW_ANISORATIO_6 2
#define BRW_ANISORATIO_8 3
#define BRW_ANISORATIO_10 4
#define BRW_ANISORATIO_12 5
#define BRW_ANISORATIO_14 6
#define BRW_ANISORATIO_16 7
 
#define BRW_BLENDFACTOR_ONE 0x1
#define BRW_BLENDFACTOR_SRC_COLOR 0x2
#define BRW_BLENDFACTOR_SRC_ALPHA 0x3
#define BRW_BLENDFACTOR_DST_ALPHA 0x4
#define BRW_BLENDFACTOR_DST_COLOR 0x5
#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define BRW_BLENDFACTOR_CONST_COLOR 0x7
#define BRW_BLENDFACTOR_CONST_ALPHA 0x8
#define BRW_BLENDFACTOR_SRC1_COLOR 0x9
#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A
#define BRW_BLENDFACTOR_ZERO 0x11
#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12
#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14
#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15
#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17
#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define BRW_BLENDFUNCTION_ADD 0
#define BRW_BLENDFUNCTION_SUBTRACT 1
#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define BRW_BLENDFUNCTION_MIN 3
#define BRW_BLENDFUNCTION_MAX 4
 
#define BRW_ALPHATEST_FORMAT_UNORM8 0
#define BRW_ALPHATEST_FORMAT_FLOAT32 1
 
#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define BRW_CHROMAKEY_REPLACE_BLACK 1
 
#define BRW_CLIP_API_OGL 0
#define BRW_CLIP_API_DX 1
 
#define BRW_CLIPMODE_NORMAL 0
#define BRW_CLIPMODE_CLIP_ALL 1
#define BRW_CLIPMODE_CLIP_NON_REJECTED 2
#define BRW_CLIPMODE_REJECT_ALL 3
#define BRW_CLIPMODE_ACCEPT_ALL 4
#define BRW_CLIPMODE_KERNEL_CLIP 5
 
#define BRW_CLIP_NDCSPACE 0
#define BRW_CLIP_SCREENSPACE 1
 
#define BRW_COMPAREFUNCTION_ALWAYS 0
#define BRW_COMPAREFUNCTION_NEVER 1
#define BRW_COMPAREFUNCTION_LESS 2
#define BRW_COMPAREFUNCTION_EQUAL 3
#define BRW_COMPAREFUNCTION_LEQUAL 4
#define BRW_COMPAREFUNCTION_GREATER 5
#define BRW_COMPAREFUNCTION_NOTEQUAL 6
#define BRW_COMPAREFUNCTION_GEQUAL 7
 
#define BRW_COVERAGE_PIXELS_HALF 0
#define BRW_COVERAGE_PIXELS_1 1
#define BRW_COVERAGE_PIXELS_2 2
#define BRW_COVERAGE_PIXELS_4 3
 
#define BRW_CULLMODE_BOTH 0
#define BRW_CULLMODE_NONE 1
#define BRW_CULLMODE_FRONT 2
#define BRW_CULLMODE_BACK 3
 
#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define BRW_DEPTHFORMAT_D32_FLOAT 1
#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT 3 /* GEN5 */
#define BRW_DEPTHFORMAT_D16_UNORM 5
 
#define BRW_FLOATING_POINT_IEEE_754 0
#define BRW_FLOATING_POINT_NON_IEEE_754 1
 
#define BRW_FRONTWINDING_CW 0
#define BRW_FRONTWINDING_CCW 1
 
#define BRW_SPRITE_POINT_ENABLE 16
 
#define BRW_CUT_INDEX_ENABLE (1 << 10)
 
#define BRW_INDEX_BYTE 0
#define BRW_INDEX_WORD 1
#define BRW_INDEX_DWORD 2
 
#define BRW_LOGICOPFUNCTION_CLEAR 0
#define BRW_LOGICOPFUNCTION_NOR 1
#define BRW_LOGICOPFUNCTION_AND_INVERTED 2
#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3
#define BRW_LOGICOPFUNCTION_AND_REVERSE 4
#define BRW_LOGICOPFUNCTION_INVERT 5
#define BRW_LOGICOPFUNCTION_XOR 6
#define BRW_LOGICOPFUNCTION_NAND 7
#define BRW_LOGICOPFUNCTION_AND 8
#define BRW_LOGICOPFUNCTION_EQUIV 9
#define BRW_LOGICOPFUNCTION_NOOP 10
#define BRW_LOGICOPFUNCTION_OR_INVERTED 11
#define BRW_LOGICOPFUNCTION_COPY 12
#define BRW_LOGICOPFUNCTION_OR_REVERSE 13
#define BRW_LOGICOPFUNCTION_OR 14
#define BRW_LOGICOPFUNCTION_SET 15
 
#define BRW_MAPFILTER_NEAREST 0x0
#define BRW_MAPFILTER_LINEAR 0x1
#define BRW_MAPFILTER_ANISOTROPIC 0x2
 
#define BRW_MIPFILTER_NONE 0
#define BRW_MIPFILTER_NEAREST 1
#define BRW_MIPFILTER_LINEAR 3
 
#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20
#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10
#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08
#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04
#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02
#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01
 
#define BRW_POLYGON_FRONT_FACING 0
#define BRW_POLYGON_BACK_FACING 1
 
#define BRW_PREFILTER_ALWAYS 0x0
#define BRW_PREFILTER_NEVER 0x1
#define BRW_PREFILTER_LESS 0x2
#define BRW_PREFILTER_EQUAL 0x3
#define BRW_PREFILTER_LEQUAL 0x4
#define BRW_PREFILTER_GREATER 0x5
#define BRW_PREFILTER_NOTEQUAL 0x6
#define BRW_PREFILTER_GEQUAL 0x7
 
#define BRW_PROVOKING_VERTEX_0 0
#define BRW_PROVOKING_VERTEX_1 1
#define BRW_PROVOKING_VERTEX_2 2
 
#define BRW_RASTRULE_UPPER_LEFT 0
#define BRW_RASTRULE_UPPER_RIGHT 1
/* These are listed as "Reserved, but not seen as useful"
* in Intel documentation (page 212, "Point Rasterization Rule",
* section 7.4 "SF Pipeline State Summary", of document
* "Intel® 965 Express Chipset Family and Intel® G35 Express
* Chipset Graphics Controller Programmer's Reference Manual,
* Volume 2: 3D/Media", Revision 1.0b as of January 2008,
* available at
* http://intellinuxgraphics.org/documentation.html
* at the time of this writing).
*
* These appear to be supported on at least some
* i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
* is useful when using OpenGL to render to a FBO
* (which has the pixel coordinate Y orientation inverted
* with respect to the normal OpenGL pixel coordinate system).
*/
#define BRW_RASTRULE_LOWER_LEFT 2
#define BRW_RASTRULE_LOWER_RIGHT 3
 
#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0
#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1
#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define BRW_STENCILOP_KEEP 0
#define BRW_STENCILOP_ZERO 1
#define BRW_STENCILOP_REPLACE 2
#define BRW_STENCILOP_INCRSAT 3
#define BRW_STENCILOP_DECRSAT 4
#define BRW_STENCILOP_INCR 5
#define BRW_STENCILOP_DECR 6
#define BRW_STENCILOP_INVERT 7
 
/* Surface state DW0 */
#define BRW_SURFACE_RC_READ_WRITE (1 << 8)
#define BRW_SURFACE_MIPLAYOUT_SHIFT 10
#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0
#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define BRW_SURFACE_CUBEFACE_ENABLES 0x3f
#define BRW_SURFACE_BLEND_ENABLED (1 << 13)
#define BRW_SURFACE_WRITEDISABLE_B_SHIFT 14
#define BRW_SURFACE_WRITEDISABLE_G_SHIFT 15
#define BRW_SURFACE_WRITEDISABLE_R_SHIFT 16
#define BRW_SURFACE_WRITEDISABLE_A_SHIFT 17
 
#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005
#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define BRW_SURFACEFORMAT_R32G32B32A32_SFIXED 0x020
#define BRW_SURFACEFORMAT_R64G64_PASSTHRU 0x021
#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041
#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042
#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define BRW_SURFACEFORMAT_R32G32B32_SFIXED 0x050
#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085
#define BRW_SURFACEFORMAT_R32G32_SINT 0x086
#define BRW_SURFACEFORMAT_R32G32_UINT 0x087
#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B
#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C
#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D
#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090
#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091
#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092
#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095
#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096
#define BRW_SURFACEFORMAT_R32G32_SFIXED 0x0A0
#define BRW_SURFACEFORMAT_R64_PASSTHRU 0x0A1
#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE
#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF
#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define BRW_SURFACEFORMAT_R32_SINT 0x0D6
#define BRW_SURFACEFORMAT_R32_UINT 0x0D7
#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8
#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3
#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4
#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5
#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1
#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2
#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8
#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9
#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106
#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107
#define BRW_SURFACEFORMAT_R8G8_SINT 0x108
#define BRW_SURFACEFORMAT_R8G8_UINT 0x109
#define BRW_SURFACEFORMAT_R16_UNORM 0x10A
#define BRW_SURFACEFORMAT_R16_SNORM 0x10B
#define BRW_SURFACEFORMAT_R16_SINT 0x10C
#define BRW_SURFACEFORMAT_R16_UINT 0x10D
#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E
#define BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0 0x10F
#define BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1 0x110
#define BRW_SURFACEFORMAT_I16_UNORM 0x111
#define BRW_SURFACEFORMAT_L16_UNORM 0x112
#define BRW_SURFACEFORMAT_A16_UNORM 0x113
#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114
#define BRW_SURFACEFORMAT_I16_FLOAT 0x115
#define BRW_SURFACEFORMAT_L16_FLOAT 0x116
#define BRW_SURFACEFORMAT_A16_FLOAT 0x117
#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118
#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D
#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E
#define BRW_SURFACEFORMAT_R16_USCALED 0x11F
#define BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0 0x122
#define BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1 0x123
#define BRW_SURFACEFORMAT_A1B5G5R5_UNORM 0x124
#define BRW_SURFACEFORMAT_A4B4G4R4_UNORM 0x125
#define BRW_SURFACEFORMAT_L8A8_UINT 0x126
#define BRW_SURFACEFORMAT_L8A8_SINT 0x127
#define BRW_SURFACEFORMAT_R8_UNORM 0x140
#define BRW_SURFACEFORMAT_R8_SNORM 0x141
#define BRW_SURFACEFORMAT_R8_SINT 0x142
#define BRW_SURFACEFORMAT_R8_UINT 0x143
#define BRW_SURFACEFORMAT_A8_UNORM 0x144
#define BRW_SURFACEFORMAT_I8_UNORM 0x145
#define BRW_SURFACEFORMAT_L8_UNORM 0x146
#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147
#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148
#define BRW_SURFACEFORMAT_R8_SSCALED 0x149
#define BRW_SURFACEFORMAT_R8_USCALED 0x14A
#define BRW_SURFACEFORMAT_P8_UNORM_PALETTE0 0x14B
#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C
#define BRW_SURFACEFORMAT_P8_UNORM_PALETTE1 0x14D
#define BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1 0x14E
#define BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1 0x14F
#define BRW_SURFACEFORMAT_Y8_SNORM 0x150
#define BRW_SURFACEFORMAT_L8_UINT 0x152
#define BRW_SURFACEFORMAT_L8_SINT 0x153
#define BRW_SURFACEFORMAT_I8_UINT 0x154
#define BRW_SURFACEFORMAT_I8_SINT 0x155
#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180
#define BRW_SURFACEFORMAT_R1_UINT 0x181
#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define BRW_SURFACEFORMAT_P2_UNORM_PALETTE0 0x184
#define BRW_SURFACEFORMAT_P2_UNORM_PALETTE1 0x185
#define BRW_SURFACEFORMAT_BC1_UNORM 0x186
#define BRW_SURFACEFORMAT_BC2_UNORM 0x187
#define BRW_SURFACEFORMAT_BC3_UNORM 0x188
#define BRW_SURFACEFORMAT_BC4_UNORM 0x189
#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A
#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define BRW_SURFACEFORMAT_MONO8 0x18E
#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define BRW_SURFACEFORMAT_DXT1_RGB 0x191
#define BRW_SURFACEFORMAT_FXT1 0x192
#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define BRW_SURFACEFORMAT_BC4_SNORM 0x199
#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A
#define BRW_SURFACEFORMAT_R16G16B16_FLOAT 0x19B
#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F
#define BRW_SURFACEFORMAT_BC6H_SF16 0x1A1
#define BRW_SURFACEFORMAT_BC7_UNORM 0x1A2
#define BRW_SURFACEFORMAT_BC7_UNORM_SRGB 0x1A3
#define BRW_SURFACEFORMAT_BC6H_UF16 0x1A4
#define BRW_SURFACEFORMAT_PLANAR_420_8 0x1A5
#define BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB 0x1A8
#define BRW_SURFACEFORMAT_ETC1_RGB8 0x1A9
#define BRW_SURFACEFORMAT_ETC2_RGB8 0x1AA
#define BRW_SURFACEFORMAT_EAC_R11 0x1AB
#define BRW_SURFACEFORMAT_EAC_RG11 0x1AC
#define BRW_SURFACEFORMAT_EAC_SIGNED_R11 0x1AD
#define BRW_SURFACEFORMAT_EAC_SIGNED_RG11 0x1AE
#define BRW_SURFACEFORMAT_ETC2_SRGB8 0x1AF
#define BRW_SURFACEFORMAT_R16G16B16_UINT 0x1B0
#define BRW_SURFACEFORMAT_R16G16B16_SINT 0x1B1
#define BRW_SURFACEFORMAT_R32_SFIXED 0x1B2
#define BRW_SURFACEFORMAT_R10G10B10A2_SNORM 0x1B3
#define BRW_SURFACEFORMAT_R10G10B10A2_USCALED 0x1B4
#define BRW_SURFACEFORMAT_R10G10B10A2_SSCALED 0x1B5
#define BRW_SURFACEFORMAT_R10G10B10A2_SINT 0x1B6
#define BRW_SURFACEFORMAT_B10G10R10A2_SNORM 0x1B7
#define BRW_SURFACEFORMAT_B10G10R10A2_USCALED 0x1B8
#define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED 0x1B9
#define BRW_SURFACEFORMAT_B10G10R10A2_UINT 0x1BA
#define BRW_SURFACEFORMAT_B10G10R10A2_SINT 0x1BB
#define BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU 0x1BC
#define BRW_SURFACEFORMAT_R64G64B64_PASSTHRU 0x1BD
#define BRW_SURFACEFORMAT_ETC2_RGB8_PTA 0x1C0
#define BRW_SURFACEFORMAT_ETC2_SRGB8_PTA 0x1C1
#define BRW_SURFACEFORMAT_ETC2_EAC_RGBA8 0x1C2
#define BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8 0x1C3
#define BRW_SURFACEFORMAT_R8G8B8_UINT 0x1C8
#define BRW_SURFACEFORMAT_R8G8B8_SINT 0x1C9
#define BRW_SURFACEFORMAT_RAW 0x1FF
#define BRW_SURFACE_FORMAT_SHIFT 18
#define BRW_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
 
#define BRW_SURFACERETURNFORMAT_FLOAT32 0
#define BRW_SURFACERETURNFORMAT_S1 1
 
#define BRW_SURFACE_TYPE_SHIFT 29
#define BRW_SURFACE_TYPE_MASK INTEL_MASK(31, 29)
#define BRW_SURFACE_1D 0
#define BRW_SURFACE_2D 1
#define BRW_SURFACE_3D 2
#define BRW_SURFACE_CUBE 3
#define BRW_SURFACE_BUFFER 4
#define BRW_SURFACE_NULL 7
 
#define GEN7_SURFACE_IS_ARRAY (1 << 28)
#define GEN7_SURFACE_VALIGN_2 (0 << 16)
#define GEN7_SURFACE_VALIGN_4 (1 << 16)
#define GEN7_SURFACE_HALIGN_4 (0 << 15)
#define GEN7_SURFACE_HALIGN_8 (1 << 15)
#define GEN7_SURFACE_TILING_NONE (0 << 13)
#define GEN7_SURFACE_TILING_X (2 << 13)
#define GEN7_SURFACE_TILING_Y (3 << 13)
#define GEN7_SURFACE_ARYSPC_FULL (0 << 10)
#define GEN7_SURFACE_ARYSPC_LOD0 (1 << 10)
 
/* Surface state DW2 */
#define BRW_SURFACE_HEIGHT_SHIFT 19
#define BRW_SURFACE_HEIGHT_MASK INTEL_MASK(31, 19)
#define BRW_SURFACE_WIDTH_SHIFT 6
#define BRW_SURFACE_WIDTH_MASK INTEL_MASK(18, 6)
#define BRW_SURFACE_LOD_SHIFT 2
#define BRW_SURFACE_LOD_MASK INTEL_MASK(5, 2)
#define GEN7_SURFACE_HEIGHT_SHIFT 16
#define GEN7_SURFACE_HEIGHT_MASK INTEL_MASK(29, 16)
#define GEN7_SURFACE_WIDTH_SHIFT 0
#define GEN7_SURFACE_WIDTH_MASK INTEL_MASK(13, 0)
 
/* Surface state DW3 */
#define BRW_SURFACE_DEPTH_SHIFT 21
#define BRW_SURFACE_DEPTH_MASK INTEL_MASK(31, 21)
#define BRW_SURFACE_PITCH_SHIFT 3
#define BRW_SURFACE_PITCH_MASK INTEL_MASK(19, 3)
#define BRW_SURFACE_TILED (1 << 1)
#define BRW_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define BRW_SURFACE_MIN_LOD_SHIFT 28
#define BRW_SURFACE_MIN_LOD_MASK INTEL_MASK(31, 28)
#define BRW_SURFACE_MULTISAMPLECOUNT_1 (0 << 4)
#define BRW_SURFACE_MULTISAMPLECOUNT_4 (2 << 4)
#define GEN7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3)
#define GEN7_SURFACE_MULTISAMPLECOUNT_4 (2 << 3)
#define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3)
#define GEN7_SURFACE_MSFMT_MSS (0 << 6)
#define GEN7_SURFACE_MSFMT_DEPTH_STENCIL (1 << 6)
 
/* Surface state DW5 */
#define BRW_SURFACE_X_OFFSET_SHIFT 25
#define BRW_SURFACE_X_OFFSET_MASK INTEL_MASK(31, 25)
#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE (1 << 24)
#define BRW_SURFACE_Y_OFFSET_SHIFT 20
#define BRW_SURFACE_Y_OFFSET_MASK INTEL_MASK(23, 20)
#define GEN7_SURFACE_MIN_LOD_SHIFT 4
#define GEN7_SURFACE_MIN_LOD_MASK INTEL_MASK(7, 4)
 
/* Surface state DW6 */
#define GEN7_SURFACE_MCS_ENABLE (1 << 0)
#define GEN7_SURFACE_MCS_PITCH_SHIFT 3
#define GEN7_SURFACE_MCS_PITCH_MASK INTEL_MASK(11, 3)
 
/* Surface state DW7 */
#define GEN7_SURFACE_SCS_R_SHIFT 25
#define GEN7_SURFACE_SCS_R_MASK INTEL_MASK(27, 25)
#define GEN7_SURFACE_SCS_G_SHIFT 22
#define GEN7_SURFACE_SCS_G_MASK INTEL_MASK(24, 22)
#define GEN7_SURFACE_SCS_B_SHIFT 19
#define GEN7_SURFACE_SCS_B_MASK INTEL_MASK(21, 19)
#define GEN7_SURFACE_SCS_A_SHIFT 16
#define GEN7_SURFACE_SCS_A_MASK INTEL_MASK(18, 16)
 
/* The actual swizzle values/what channel to use */
#define HSW_SCS_ZERO 0
#define HSW_SCS_ONE 1
#define HSW_SCS_RED 4
#define HSW_SCS_GREEN 5
#define HSW_SCS_BLUE 6
#define HSW_SCS_ALPHA 7
 
#define BRW_TEXCOORDMODE_WRAP 0
#define BRW_TEXCOORDMODE_MIRROR 1
#define BRW_TEXCOORDMODE_CLAMP 2
#define BRW_TEXCOORDMODE_CUBE 3
#define BRW_TEXCOORDMODE_CLAMP_BORDER 4
#define BRW_TEXCOORDMODE_MIRROR_ONCE 5
 
#define BRW_THREAD_PRIORITY_NORMAL 0
#define BRW_THREAD_PRIORITY_HIGH 1
 
#define BRW_TILEWALK_XMAJOR 0
#define BRW_TILEWALK_YMAJOR 1
 
#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
/* Execution Unit (EU) defines
*/
 
#define BRW_ALIGN_1 0
#define BRW_ALIGN_16 1
 
#define BRW_ADDRESS_DIRECT 0
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define BRW_CHANNEL_X 0
#define BRW_CHANNEL_Y 1
#define BRW_CHANNEL_Z 2
#define BRW_CHANNEL_W 3
 
enum brw_compression {
BRW_COMPRESSION_NONE = 0,
BRW_COMPRESSION_2NDHALF = 1,
BRW_COMPRESSION_COMPRESSED = 2,
};
 
#define GEN6_COMPRESSION_1Q 0
#define GEN6_COMPRESSION_2Q 1
#define GEN6_COMPRESSION_3Q 2
#define GEN6_COMPRESSION_4Q 3
#define GEN6_COMPRESSION_1H 0
#define GEN6_COMPRESSION_2H 2
 
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
#define BRW_CONDITIONAL_EQ 1 /* Z */
#define BRW_CONDITIONAL_NEQ 2 /* NZ */
#define BRW_CONDITIONAL_G 3
#define BRW_CONDITIONAL_GE 4
#define BRW_CONDITIONAL_L 5
#define BRW_CONDITIONAL_LE 6
#define BRW_CONDITIONAL_R 7
#define BRW_CONDITIONAL_O 8
#define BRW_CONDITIONAL_U 9
 
#define BRW_DEBUG_NONE 0
#define BRW_DEBUG_BREAKPOINT 1
 
#define BRW_DEPENDENCY_NORMAL 0
#define BRW_DEPENDENCY_NOTCLEARED 1
#define BRW_DEPENDENCY_NOTCHECKED 2
#define BRW_DEPENDENCY_DISABLE 3
 
#define BRW_EXECUTE_1 0
#define BRW_EXECUTE_2 1
#define BRW_EXECUTE_4 2
#define BRW_EXECUTE_8 3
#define BRW_EXECUTE_16 4
#define BRW_EXECUTE_32 5
 
#define BRW_HORIZONTAL_STRIDE_0 0
#define BRW_HORIZONTAL_STRIDE_1 1
#define BRW_HORIZONTAL_STRIDE_2 2
#define BRW_HORIZONTAL_STRIDE_4 3
 
#define BRW_INSTRUCTION_NORMAL 0
#define BRW_INSTRUCTION_SATURATE 1
 
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
 
/** @{
*
* Gen6 has replaced "mask enable/disable" with WECtrl, which is
* effectively the same but much simpler to think about. Now, there
* are two contributors ANDed together to whether channels are
* executed: The predication on the instruction, and the channel write
* enable.
*/
/**
* This is the default value. It means that a channel's write enable is set
* if the per-channel IP is pointing at this instruction.
*/
#define BRW_WE_NORMAL 0
/**
* This is used like BRW_MASK_DISABLE, and causes all channels to have
* their write enable set. Note that predication still contributes to
* whether the channel actually gets written.
*/
#define BRW_WE_ALL 1
/** @} */
 
enum opcode {
/* These are the actual hardware opcodes. */
BRW_OPCODE_MOV = 1,
BRW_OPCODE_SEL = 2,
BRW_OPCODE_NOT = 4,
BRW_OPCODE_AND = 5,
BRW_OPCODE_OR = 6,
BRW_OPCODE_XOR = 7,
BRW_OPCODE_SHR = 8,
BRW_OPCODE_SHL = 9,
BRW_OPCODE_RSR = 10,
BRW_OPCODE_RSL = 11,
BRW_OPCODE_ASR = 12,
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_F32TO16 = 19,
BRW_OPCODE_F16TO32 = 20,
BRW_OPCODE_BFREV = 23,
BRW_OPCODE_BFE = 24,
BRW_OPCODE_BFI1 = 25,
BRW_OPCODE_BFI2 = 26,
BRW_OPCODE_JMPI = 32,
BRW_OPCODE_IF = 34,
BRW_OPCODE_IFF = 35,
BRW_OPCODE_ELSE = 36,
BRW_OPCODE_ENDIF = 37,
BRW_OPCODE_DO = 38,
BRW_OPCODE_WHILE = 39,
BRW_OPCODE_BREAK = 40,
BRW_OPCODE_CONTINUE = 41,
BRW_OPCODE_HALT = 42,
BRW_OPCODE_MSAVE = 44,
BRW_OPCODE_MRESTORE = 45,
BRW_OPCODE_PUSH = 46,
BRW_OPCODE_POP = 47,
BRW_OPCODE_WAIT = 48,
BRW_OPCODE_SEND = 49,
BRW_OPCODE_SENDC = 50,
BRW_OPCODE_MATH = 56,
BRW_OPCODE_ADD = 64,
BRW_OPCODE_MUL = 65,
BRW_OPCODE_AVG = 66,
BRW_OPCODE_FRC = 67,
BRW_OPCODE_RNDU = 68,
BRW_OPCODE_RNDD = 69,
BRW_OPCODE_RNDE = 70,
BRW_OPCODE_RNDZ = 71,
BRW_OPCODE_MAC = 72,
BRW_OPCODE_MACH = 73,
BRW_OPCODE_LZD = 74,
BRW_OPCODE_FBH = 75,
BRW_OPCODE_FBL = 76,
BRW_OPCODE_CBIT = 77,
BRW_OPCODE_SAD2 = 80,
BRW_OPCODE_SADA2 = 81,
BRW_OPCODE_DP4 = 84,
BRW_OPCODE_DPH = 85,
BRW_OPCODE_DP3 = 86,
BRW_OPCODE_DP2 = 87,
BRW_OPCODE_DPA2 = 88,
BRW_OPCODE_LINE = 89,
BRW_OPCODE_PLN = 90,
BRW_OPCODE_MAD = 91,
BRW_OPCODE_LRP = 92,
BRW_OPCODE_NOP = 126,
 
/* These are compiler backend opcodes that get translated into other
* instructions.
*/
FS_OPCODE_FB_WRITE = 128,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
SHADER_OPCODE_EXP2,
SHADER_OPCODE_LOG2,
SHADER_OPCODE_POW,
SHADER_OPCODE_INT_QUOTIENT,
SHADER_OPCODE_INT_REMAINDER,
SHADER_OPCODE_SIN,
SHADER_OPCODE_COS,
 
SHADER_OPCODE_TEX,
SHADER_OPCODE_TXD,
SHADER_OPCODE_TXF,
SHADER_OPCODE_TXL,
SHADER_OPCODE_TXS,
FS_OPCODE_TXB,
SHADER_OPCODE_TXF_MS,
SHADER_OPCODE_LOD,
 
SHADER_OPCODE_SHADER_TIME_ADD,
 
FS_OPCODE_DDX,
FS_OPCODE_DDY,
FS_OPCODE_PIXEL_X,
FS_OPCODE_PIXEL_Y,
FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SIMD4X2_OFFSET,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
FS_OPCODE_PLACEHOLDER_HALT,
 
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
};
 
#define BRW_PREDICATE_NONE 0
#define BRW_PREDICATE_NORMAL 1
#define BRW_PREDICATE_ALIGN1_ANYV 2
#define BRW_PREDICATE_ALIGN1_ALLV 3
#define BRW_PREDICATE_ALIGN1_ANY2H 4
#define BRW_PREDICATE_ALIGN1_ALL2H 5
#define BRW_PREDICATE_ALIGN1_ANY4H 6
#define BRW_PREDICATE_ALIGN1_ALL4H 7
#define BRW_PREDICATE_ALIGN1_ANY8H 8
#define BRW_PREDICATE_ALIGN1_ALL8H 9
#define BRW_PREDICATE_ALIGN1_ANY16H 10
#define BRW_PREDICATE_ALIGN1_ALL16H 11
#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
#define BRW_PREDICATE_ALIGN16_ANY4H 6
#define BRW_PREDICATE_ALIGN16_ALL4H 7
 
#define BRW_ARCHITECTURE_REGISTER_FILE 0
#define BRW_GENERAL_REGISTER_FILE 1
#define BRW_MESSAGE_REGISTER_FILE 2
#define BRW_IMMEDIATE_VALUE 3
 
#define BRW_REGISTER_TYPE_UD 0
#define BRW_REGISTER_TYPE_D 1
#define BRW_REGISTER_TYPE_UW 2
#define BRW_REGISTER_TYPE_W 3
#define BRW_REGISTER_TYPE_UB 4
#define BRW_REGISTER_TYPE_B 5
#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define BRW_REGISTER_TYPE_HF 6
#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define BRW_REGISTER_TYPE_F 7
 
/* SNB adds 3-src instructions (MAD and LRP) that only operate on floats, so
* the types were implied. IVB adds BFE and BFI2 that operate on doublewords
* and unsigned doublewords, so a new field is also available in the da3src
* struct (part of struct brw_instruction.bits1 in brw_structs.h) to select
* dst and shared-src types. The values are different from BRW_REGISTER_TYPE_*.
*/
#define BRW_3SRC_TYPE_F 0
#define BRW_3SRC_TYPE_D 1
#define BRW_3SRC_TYPE_UD 2
#define BRW_3SRC_TYPE_DF 3
 
#define BRW_ARF_NULL 0x00
#define BRW_ARF_ADDRESS 0x10
#define BRW_ARF_ACCUMULATOR 0x20
#define BRW_ARF_FLAG 0x30
#define BRW_ARF_MASK 0x40
#define BRW_ARF_MASK_STACK 0x50
#define BRW_ARF_MASK_STACK_DEPTH 0x60
#define BRW_ARF_STATE 0x70
#define BRW_ARF_CONTROL 0x80
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
#define BRW_ARF_TDR 0xB0
#define BRW_ARF_TIMESTAMP 0xC0
 
#define BRW_MRF_COMPR4 (1 << 7)
 
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
#define BRW_CMASK 3
 
 
 
#define BRW_THREAD_NORMAL 0
#define BRW_THREAD_ATOMIC 1
#define BRW_THREAD_SWITCH 2
 
#define BRW_VERTICAL_STRIDE_0 0
#define BRW_VERTICAL_STRIDE_1 1
#define BRW_VERTICAL_STRIDE_2 2
#define BRW_VERTICAL_STRIDE_4 3
#define BRW_VERTICAL_STRIDE_8 4
#define BRW_VERTICAL_STRIDE_16 5
#define BRW_VERTICAL_STRIDE_32 6
#define BRW_VERTICAL_STRIDE_64 7
#define BRW_VERTICAL_STRIDE_128 8
#define BRW_VERTICAL_STRIDE_256 9
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define BRW_WIDTH_1 0
#define BRW_WIDTH_2 1
#define BRW_WIDTH_4 2
#define BRW_WIDTH_8 3
#define BRW_WIDTH_16 4
 
#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define BRW_POLYGON_FACING_FRONT 0
#define BRW_POLYGON_FACING_BACK 1
 
/**
* Message target: Shared Function ID for where to SEND a message.
*
* These are enumerated in the ISA reference under "send - Send Message".
* In particular, see the following tables:
* - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
* - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
* - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
* Overview / GPE Function IDs
*/
enum brw_message_target {
BRW_SFID_NULL = 0,
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
BRW_SFID_SAMPLER = 2,
BRW_SFID_MESSAGE_GATEWAY = 3,
BRW_SFID_DATAPORT_READ = 4,
BRW_SFID_DATAPORT_WRITE = 5,
BRW_SFID_URB = 6,
BRW_SFID_THREAD_SPAWNER = 7,
 
GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
 
GEN7_SFID_DATAPORT_DATA_CACHE = 10,
HSW_SFID_DATAPORT_DATA_CACHE_1 = 12,
};
 
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
 
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN5_SAMPLER_MESSAGE_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
#define GEN5_SAMPLER_MESSAGE_LOD 9
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30
#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31
 
/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
 
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
/* G45, GEN5 */
#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
/* GEN6 */
#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
 
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
/* GEN6 */
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
 
/* GEN7 */
#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 10
#define GEN7_DATAPORT_DC_OWORD_BLOCK_READ 0
#define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ 1
#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ 2
#define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ 3
#define GEN7_DATAPORT_DC_BYTE_SCATTERED_READ 4
#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ 5
#define GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP 6
#define GEN7_DATAPORT_DC_MEMORY_FENCE 7
#define GEN7_DATAPORT_DC_OWORD_BLOCK_WRITE 8
#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE 10
#define GEN7_DATAPORT_DC_DWORD_SCATTERED_WRITE 11
#define GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE 12
#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE 13
 
/* HSW */
#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ 0
#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ 1
#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_READ 2
#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_READ 3
#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ 4
#define HSW_DATAPORT_DC_PORT0_MEMORY_FENCE 7
#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_WRITE 8
#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_WRITE 10
#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_WRITE 11
#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE 12
 
#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ 1
#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP 2
#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2 3
#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ 4
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ 5
#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP 6
#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 7
#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE 9
#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE 10
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP 11
#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2 12
#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE 13
 
/* dataport atomic operations. */
#define BRW_AOP_AND 1
#define BRW_AOP_OR 2
#define BRW_AOP_XOR 3
#define BRW_AOP_MOV 4
#define BRW_AOP_INC 5
#define BRW_AOP_DEC 6
#define BRW_AOP_ADD 7
#define BRW_AOP_SUB 8
#define BRW_AOP_REVSUB 9
#define BRW_AOP_IMAX 10
#define BRW_AOP_IMIN 11
#define BRW_AOP_UMAX 12
#define BRW_AOP_UMIN 13
#define BRW_AOP_CMPWR 14
#define BRW_AOP_PREDEC 15
 
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
#define BRW_MATH_FUNCTION_SQRT 4
#define BRW_MATH_FUNCTION_RSQ 5
#define BRW_MATH_FUNCTION_SIN 6
#define BRW_MATH_FUNCTION_COS 7
#define BRW_MATH_FUNCTION_SINCOS 8 /* gen4, gen5 */
#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define BRW_MATH_INTEGER_UNSIGNED 0
#define BRW_MATH_INTEGER_SIGNED 1
 
#define BRW_MATH_PRECISION_FULL 0
#define BRW_MATH_PRECISION_PARTIAL 1
 
#define BRW_MATH_SATURATE_NONE 0
#define BRW_MATH_SATURATE_SATURATE 1
 
#define BRW_MATH_DATA_VECTOR 0
#define BRW_MATH_DATA_SCALAR 1
 
#define BRW_URB_OPCODE_WRITE 0
 
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
#define BRW_URB_SWIZZLE_TRANSPOSE 2
 
#define BRW_SCRATCH_SPACE_SIZE_1K 0
#define BRW_SCRATCH_SPACE_SIZE_2K 1
#define BRW_SCRATCH_SPACE_SIZE_4K 2
#define BRW_SCRATCH_SPACE_SIZE_8K 3
#define BRW_SCRATCH_SPACE_SIZE_16K 4
#define BRW_SCRATCH_SPACE_SIZE_32K 5
#define BRW_SCRATCH_SPACE_SIZE_64K 6
#define BRW_SCRATCH_SPACE_SIZE_128K 7
#define BRW_SCRATCH_SPACE_SIZE_256K 8
#define BRW_SCRATCH_SPACE_SIZE_512K 9
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
 
 
#define CMD_URB_FENCE 0x6000
#define CMD_CS_URB_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
 
#define CMD_STATE_BASE_ADDRESS 0x6101
#define CMD_STATE_SIP 0x6102
#define CMD_PIPELINE_SELECT_965 0x6104
#define CMD_PIPELINE_SELECT_GM45 0x6904
 
#define _3DSTATE_PIPELINED_POINTERS 0x7800
#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801
# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
 
#define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x7826 /* GEN7+ */
#define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x7827 /* GEN7+ */
#define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x7828 /* GEN7+ */
#define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */
#define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */
 
#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
# define GS_SAMPLER_STATE_CHANGE (1 << 9)
# define VS_SAMPLER_STATE_CHANGE (1 << 8)
/* DW1: VS */
/* DW2: GS */
/* DW3: PS */
 
#define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782B /* GEN7+ */
#define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782E /* GEN7+ */
#define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782F /* GEN7+ */
 
#define _3DSTATE_VERTEX_BUFFERS 0x7808
# define BRW_VB0_INDEX_SHIFT 27
# define GEN6_VB0_INDEX_SHIFT 26
# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26)
# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26)
# define GEN6_VB0_ACCESS_VERTEXDATA (0 << 20)
# define GEN6_VB0_ACCESS_INSTANCEDATA (1 << 20)
# define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
# define BRW_VB0_PITCH_SHIFT 0
 
#define _3DSTATE_VERTEX_ELEMENTS 0x7809
# define BRW_VE0_INDEX_SHIFT 27
# define GEN6_VE0_INDEX_SHIFT 26
# define BRW_VE0_FORMAT_SHIFT 16
# define BRW_VE0_VALID (1 << 26)
# define GEN6_VE0_VALID (1 << 25)
# define GEN6_VE0_EDGE_FLAG_ENABLE (1 << 15)
# define BRW_VE0_SRC_OFFSET_SHIFT 0
# define BRW_VE1_COMPONENT_NOSTORE 0
# define BRW_VE1_COMPONENT_STORE_SRC 1
# define BRW_VE1_COMPONENT_STORE_0 2
# define BRW_VE1_COMPONENT_STORE_1_FLT 3
# define BRW_VE1_COMPONENT_STORE_1_INT 4
# define BRW_VE1_COMPONENT_STORE_VID 5
# define BRW_VE1_COMPONENT_STORE_IID 6
# define BRW_VE1_COMPONENT_STORE_PID 7
# define BRW_VE1_COMPONENT_0_SHIFT 28
# define BRW_VE1_COMPONENT_1_SHIFT 24
# define BRW_VE1_COMPONENT_2_SHIFT 20
# define BRW_VE1_COMPONENT_3_SHIFT 16
# define BRW_VE1_DST_OFFSET_SHIFT 0
 
#define CMD_INDEX_BUFFER 0x780a
#define GEN4_3DSTATE_VF_STATISTICS 0x780b
#define GM45_3DSTATE_VF_STATISTICS 0x680b
#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */
#define _3DSTATE_BLEND_STATE_POINTERS 0x7824 /* GEN7+ */
#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS 0x7825 /* GEN7+ */
 
#define _3DSTATE_URB 0x7805 /* GEN6 */
# define GEN6_URB_VS_SIZE_SHIFT 16
# define GEN6_URB_VS_ENTRIES_SHIFT 0
# define GEN6_URB_GS_ENTRIES_SHIFT 8
# define GEN6_URB_GS_SIZE_SHIFT 0
 
#define _3DSTATE_VF 0x780c /* GEN7.5+ */
#define HSW_CUT_INDEX_ENABLE (1 << 8)
 
#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */
#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */
#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */
#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */
# define GEN7_URB_ENTRY_SIZE_SHIFT 16
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
 
#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */
#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
 
#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */
# define GEN6_CC_VIEWPORT_MODIFY (1 << 12)
# define GEN6_SF_VIEWPORT_MODIFY (1 << 11)
# define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10)
 
#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x7823 /* GEN7+ */
#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL 0x7821 /* GEN7+ */
 
#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */
 
#define _3DSTATE_VS 0x7810 /* GEN6+ */
/* DW2 */
# define GEN6_VS_SPF_MODE (1 << 31)
# define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_VS_SAMPLER_COUNT_SHIFT 27
# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW4 */
# define GEN6_VS_DISPATCH_START_GRF_SHIFT 20
# define GEN6_VS_URB_READ_LENGTH_SHIFT 11
# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW5 */
# define GEN6_VS_MAX_THREADS_SHIFT 25
# define HSW_VS_MAX_THREADS_SHIFT 23
# define GEN6_VS_STATISTICS_ENABLE (1 << 10)
# define GEN6_VS_CACHE_DISABLE (1 << 1)
# define GEN6_VS_ENABLE (1 << 0)
 
#define _3DSTATE_GS 0x7811 /* GEN6+ */
/* DW2 */
# define GEN6_GS_SPF_MODE (1 << 31)
# define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_GS_SAMPLER_COUNT_SHIFT 27
# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW4 */
# define GEN6_GS_URB_READ_LENGTH_SHIFT 11
# define GEN7_GS_INCLUDE_VERTEX_HANDLES (1 << 10)
# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
# define GEN6_GS_DISPATCH_START_GRF_SHIFT 0
/* DW5 */
# define GEN6_GS_MAX_THREADS_SHIFT 25
# define GEN6_GS_STATISTICS_ENABLE (1 << 10)
# define GEN6_GS_SO_STATISTICS_ENABLE (1 << 9)
# define GEN6_GS_RENDERING_ENABLE (1 << 8)
# define GEN7_GS_ENABLE (1 << 0)
/* DW6 */
# define GEN6_GS_REORDER (1 << 30)
# define GEN6_GS_DISCARD_ADJACENCY (1 << 29)
# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28)
# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE (1 << 27)
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT 16
# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
# define GEN6_GS_ENABLE (1 << 15)
 
# define BRW_GS_EDGE_INDICATOR_0 (1 << 8)
# define BRW_GS_EDGE_INDICATOR_1 (1 << 9)
 
#define _3DSTATE_HS 0x781B /* GEN7+ */
#define _3DSTATE_TE 0x781C /* GEN7+ */
#define _3DSTATE_DS 0x781D /* GEN7+ */
 
#define _3DSTATE_CLIP 0x7812 /* GEN6+ */
/* DW1 */
# define GEN7_CLIP_WINDING_CW (0 << 20)
# define GEN7_CLIP_WINDING_CCW (1 << 20)
# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8 (0 << 19)
# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4 (1 << 19)
# define GEN7_CLIP_EARLY_CULL (1 << 18)
# define GEN7_CLIP_CULLMODE_BOTH (0 << 16)
# define GEN7_CLIP_CULLMODE_NONE (1 << 16)
# define GEN7_CLIP_CULLMODE_FRONT (2 << 16)
# define GEN7_CLIP_CULLMODE_BACK (3 << 16)
# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
/**
* Just does cheap culling based on the clip distance. Bits must be
* disjoint with USER_CLIP_CLIP_DISTANCE bits.
*/
# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0
/* DW2 */
# define GEN6_CLIP_ENABLE (1 << 31)
# define GEN6_CLIP_API_OGL (0 << 30)
# define GEN6_CLIP_API_D3D (1 << 30)
# define GEN6_CLIP_XY_TEST (1 << 28)
# define GEN6_CLIP_Z_TEST (1 << 27)
# define GEN6_CLIP_GB_TEST (1 << 26)
/** 8-bit field of which user clip distances to clip aganist. */
# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
# define GEN6_CLIP_MODE_NORMAL (0 << 13)
# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE (1 << 9)
# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE (1 << 8)
# define GEN6_CLIP_TRI_PROVOKE_SHIFT 4
# define GEN6_CLIP_LINE_PROVOKE_SHIFT 2
# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT 0
/* DW3 */
# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT 17
# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6
# define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5)
 
#define _3DSTATE_SF 0x7813 /* GEN6+ */
/* DW1 (for gen6) */
# define GEN6_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_SF_SWIZZLE_ENABLE (1 << 21)
# define GEN6_SF_POINT_SPRITE_UPPERLEFT (0 << 20)
# define GEN6_SF_POINT_SPRITE_LOWERLEFT (1 << 20)
# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS (1 << 11)
# define GEN6_SF_STATISTICS_ENABLE (1 << 10)
# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID (1 << 9)
# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME (1 << 8)
# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT (1 << 7)
# define GEN6_SF_FRONT_SOLID (0 << 5)
# define GEN6_SF_FRONT_WIREFRAME (1 << 5)
# define GEN6_SF_FRONT_POINT (2 << 5)
# define GEN6_SF_BACK_SOLID (0 << 3)
# define GEN6_SF_BACK_WIREFRAME (1 << 3)
# define GEN6_SF_BACK_POINT (2 << 3)
# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE (1 << 1)
# define GEN6_SF_WINDING_CCW (1 << 0)
/* DW3 */
# define GEN6_SF_LINE_AA_ENABLE (1 << 31)
# define GEN6_SF_CULL_BOTH (0 << 29)
# define GEN6_SF_CULL_NONE (1 << 29)
# define GEN6_SF_CULL_FRONT (2 << 29)
# define GEN6_SF_CULL_BACK (3 << 29)
# define GEN6_SF_LINE_WIDTH_SHIFT 18 /* U3.7 */
# define GEN6_SF_LINE_END_CAP_WIDTH_0_5 (0 << 16)
# define GEN6_SF_LINE_END_CAP_WIDTH_1_0 (1 << 16)
# define GEN6_SF_LINE_END_CAP_WIDTH_2_0 (2 << 16)
# define GEN6_SF_LINE_END_CAP_WIDTH_4_0 (3 << 16)
# define GEN6_SF_SCISSOR_ENABLE (1 << 11)
# define GEN6_SF_MSRAST_OFF_PIXEL (0 << 8)
# define GEN6_SF_MSRAST_OFF_PATTERN (1 << 8)
# define GEN6_SF_MSRAST_ON_PIXEL (2 << 8)
# define GEN6_SF_MSRAST_ON_PATTERN (3 << 8)
/* DW4 */
# define GEN6_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_SF_TRIFAN_PROVOKE_SHIFT 25
# define GEN6_SF_LINE_AA_MODE_MANHATTAN (0 << 14)
# define GEN6_SF_LINE_AA_MODE_TRUE (1 << 14)
# define GEN6_SF_VERTEX_SUBPIXEL_8BITS (0 << 12)
# define GEN6_SF_VERTEX_SUBPIXEL_4BITS (1 << 12)
# define GEN6_SF_USE_STATE_POINT_WIDTH (1 << 11)
# define GEN6_SF_POINT_WIDTH_SHIFT 0 /* U8.3 */
/* DW5: depth offset constant */
/* DW6: depth offset scale */
/* DW7: depth offset clamp */
/* DW8 */
# define ATTRIBUTE_1_OVERRIDE_W (1 << 31)
# define ATTRIBUTE_1_OVERRIDE_Z (1 << 30)
# define ATTRIBUTE_1_OVERRIDE_Y (1 << 29)
# define ATTRIBUTE_1_OVERRIDE_X (1 << 28)
# define ATTRIBUTE_1_CONST_SOURCE_SHIFT 25
# define ATTRIBUTE_1_SWIZZLE_SHIFT 22
# define ATTRIBUTE_1_SOURCE_SHIFT 16
# define ATTRIBUTE_0_OVERRIDE_W (1 << 15)
# define ATTRIBUTE_0_OVERRIDE_Z (1 << 14)
# define ATTRIBUTE_0_OVERRIDE_Y (1 << 13)
# define ATTRIBUTE_0_OVERRIDE_X (1 << 12)
# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
# define ATTRIBUTE_0_SOURCE_SHIFT 0
 
# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
# define ATTRIBUTE_SWIZZLE_SHIFT 6
 
/* DW16: Point sprite texture coordinate enables */
/* DW17: Constant interpolation enables */
/* DW18: attr 0-7 wrap shortest enables */
/* DW19: attr 8-16 wrap shortest enables */
 
/* On GEN7, many fields of 3DSTATE_SF were split out into a new command:
* 3DSTATE_SBE. The remaining fields live in different DWords, but retain
* the same bit-offset. The only new field:
*/
/* GEN7/DW1: */
# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT 12
/* GEN7/DW2: */
# define HSW_SF_LINE_STIPPLE_ENABLE 14
 
#define _3DSTATE_SBE 0x781F /* GEN7+ */
/* DW1 */
# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28)
# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22
# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21)
# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */
/* DW10: Point sprite texture coordinate enables */
/* DW11: Constant interpolation enables */
/* DW12: attr 0-7 wrap shortest enables */
/* DW13: attr 8-16 wrap shortest enables */
 
enum brw_wm_barycentric_interp_mode {
BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC = 0,
BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC = 1,
BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC = 2,
BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC = 3,
BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC = 4,
BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC = 5,
BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT = 6
};
#define BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS \
((1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC) | \
(1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC) | \
(1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
 
#define _3DSTATE_WM 0x7814 /* GEN6+ */
/* DW1: kernel pointer */
/* DW2 */
# define GEN6_WM_SPF_MODE (1 << 31)
# define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30)
# define GEN6_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define GEN6_WM_STATISTICS_ENABLE (1 << 31)
# define GEN6_WM_DEPTH_CLEAR (1 << 30)
# define GEN6_WM_DEPTH_RESOLVE (1 << 28)
# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0 16
# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1 8
# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2 0
/* DW5 */
# define GEN6_WM_MAX_THREADS_SHIFT 25
# define GEN6_WM_KILL_ENABLE (1 << 22)
# define GEN6_WM_COMPUTED_DEPTH (1 << 21)
# define GEN6_WM_USES_SOURCE_DEPTH (1 << 20)
# define GEN6_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 16)
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 16)
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 16)
# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 16)
# define GEN6_WM_LINE_AA_WIDTH_0_5 (0 << 14)
# define GEN6_WM_LINE_AA_WIDTH_1_0 (1 << 14)
# define GEN6_WM_LINE_AA_WIDTH_2_0 (2 << 14)
# define GEN6_WM_LINE_AA_WIDTH_4_0 (3 << 14)
# define GEN6_WM_POLYGON_STIPPLE_ENABLE (1 << 13)
# define GEN6_WM_LINE_STIPPLE_ENABLE (1 << 11)
# define GEN6_WM_OMASK_TO_RENDER_TARGET (1 << 9)
# define GEN6_WM_USES_SOURCE_W (1 << 8)
# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN6_WM_32_DISPATCH_ENABLE (1 << 2)
# define GEN6_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_WM_POSOFFSET_NONE (0 << 18)
# define GEN6_WM_POSOFFSET_CENTROID (2 << 18)
# define GEN6_WM_POSOFFSET_SAMPLE (3 << 18)
# define GEN6_WM_POSITION_ZW_PIXEL (0 << 16)
# define GEN6_WM_POSITION_ZW_CENTROID (2 << 16)
# define GEN6_WM_POSITION_ZW_SAMPLE (3 << 16)
# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 10
# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 9)
# define GEN6_WM_MSRAST_OFF_PIXEL (0 << 1)
# define GEN6_WM_MSRAST_OFF_PATTERN (1 << 1)
# define GEN6_WM_MSRAST_ON_PIXEL (2 << 1)
# define GEN6_WM_MSRAST_ON_PATTERN (3 << 1)
# define GEN6_WM_MSDISPMODE_PERSAMPLE (0 << 0)
# define GEN6_WM_MSDISPMODE_PERPIXEL (1 << 0)
/* DW7: kernel 1 pointer */
/* DW8: kernel 2 pointer */
 
#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */
#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */
#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */
# define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15)
# define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14)
# define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13)
# define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12)
 
#define _3DSTATE_CONSTANT_HS 0x7819 /* GEN7+ */
#define _3DSTATE_CONSTANT_DS 0x781A /* GEN7+ */
 
#define _3DSTATE_STREAMOUT 0x781e /* GEN7+ */
/* DW1 */
# define SO_FUNCTION_ENABLE (1 << 31)
# define SO_RENDERING_DISABLE (1 << 30)
/* This selects which incoming rendering stream goes down the pipeline. The
* rendering stream is 0 if not defined by special cases in the GS state.
*/
# define SO_RENDER_STREAM_SELECT_SHIFT 27
# define SO_RENDER_STREAM_SELECT_MASK INTEL_MASK(28, 27)
/* Controls reordering of TRISTRIP_* elements in stream output (not rendering).
*/
# define SO_REORDER_TRAILING (1 << 26)
/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */
# define SO_STATISTICS_ENABLE (1 << 25)
# define SO_BUFFER_ENABLE(n) (1 << (8 + (n)))
/* DW2 */
# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT 29
# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK INTEL_MASK(29, 29)
# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT 24
# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK INTEL_MASK(28, 24)
# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT 21
# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK INTEL_MASK(21, 21)
# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT 16
# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK INTEL_MASK(20, 16)
# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT 13
# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK INTEL_MASK(13, 13)
# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT 8
# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK INTEL_MASK(12, 8)
# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT 5
# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK INTEL_MASK(5, 5)
# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT 0
# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK INTEL_MASK(4, 0)
 
/* 3DSTATE_WM for Gen7 */
/* DW1 */
# define GEN7_WM_STATISTICS_ENABLE (1 << 31)
# define GEN7_WM_DEPTH_CLEAR (1 << 30)
# define GEN7_WM_DISPATCH_ENABLE (1 << 29)
# define GEN7_WM_DEPTH_RESOLVE (1 << 28)
# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
# define GEN7_WM_KILL_ENABLE (1 << 25)
# define GEN7_WM_PSCDEPTH_OFF (0 << 23)
# define GEN7_WM_PSCDEPTH_ON (1 << 23)
# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23)
# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23)
# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
# define GEN7_WM_USES_SOURCE_W (1 << 19)
# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17)
# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17)
# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT 11
# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8)
# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6)
# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6)
# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6)
# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6)
# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4)
# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3)
# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2)
# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0)
# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0)
# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
/* DW2 */
# define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31)
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
 
#define _3DSTATE_PS 0x7820 /* GEN7+ */
/* DW1: kernel pointer */
/* DW2 */
# define GEN7_PS_SPF_MODE (1 << 31)
# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN7_PS_SAMPLER_COUNT_SHIFT 27
# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define IVB_PS_MAX_THREADS_SHIFT 24
# define HSW_PS_MAX_THREADS_SHIFT 23
# define HSW_PS_SAMPLE_MASK_SHIFT 12
# define HSW_PS_SAMPLE_MASK_MASK INTEL_MASK(19, 12)
# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2)
# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1)
# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0)
/* DW5 */
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0
/* DW6: kernel 1 pointer */
/* DW7: kernel 2 pointer */
 
#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */
 
#define _3DSTATE_DRAWING_RECTANGLE 0x7900
#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901
#define _3DSTATE_CHROMA_KEY 0x7904
#define _3DSTATE_DEPTH_BUFFER 0x7905 /* GEN4-6 */
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907
#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */
 
#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */
/* DW1 */
# define SVB_INDEX_SHIFT 29
# define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */
/* DW2: SVB index */
/* DW3: SVB maximum index */
 
#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */
/* DW1 */
# define MS_PIXEL_LOCATION_CENTER (0 << 4)
# define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define MS_NUMSAMPLES_1 (0 << 1)
# define MS_NUMSAMPLES_4 (2 << 1)
# define MS_NUMSAMPLES_8 (3 << 1)
 
#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */
#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */
 
#define GEN7_3DSTATE_CLEAR_PARAMS 0x7804
#define GEN7_3DSTATE_DEPTH_BUFFER 0x7805
#define GEN7_3DSTATE_STENCIL_BUFFER 0x7806
# define HSW_STENCIL_ENABLED (1 << 31)
#define GEN7_3DSTATE_HIER_DEPTH_BUFFER 0x7807
 
#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK, SNB */
# define GEN5_DEPTH_CLEAR_VALID (1 << 15)
/* DW1: depth clear value */
/* DW2 */
# define GEN7_DEPTH_CLEAR_VALID (1 << 0)
 
#define _3DSTATE_SO_DECL_LIST 0x7917 /* GEN7+ */
/* DW1 */
# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT 12
# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK INTEL_MASK(15, 12)
# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT 8
# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK INTEL_MASK(11, 8)
# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT 4
# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK INTEL_MASK(7, 4)
# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT 0
# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK INTEL_MASK(3, 0)
/* DW2 */
# define SO_NUM_ENTRIES_3_SHIFT 24
# define SO_NUM_ENTRIES_3_MASK INTEL_MASK(31, 24)
# define SO_NUM_ENTRIES_2_SHIFT 16
# define SO_NUM_ENTRIES_2_MASK INTEL_MASK(23, 16)
# define SO_NUM_ENTRIES_1_SHIFT 8
# define SO_NUM_ENTRIES_1_MASK INTEL_MASK(15, 8)
# define SO_NUM_ENTRIES_0_SHIFT 0
# define SO_NUM_ENTRIES_0_MASK INTEL_MASK(7, 0)
 
/* SO_DECL DW0 */
# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT 12
# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK INTEL_MASK(13, 12)
# define SO_DECL_HOLE_FLAG (1 << 11)
# define SO_DECL_REGISTER_INDEX_SHIFT 4
# define SO_DECL_REGISTER_INDEX_MASK INTEL_MASK(9, 4)
# define SO_DECL_COMPONENT_MASK_SHIFT 0
# define SO_DECL_COMPONENT_MASK_MASK INTEL_MASK(3, 0)
 
#define _3DSTATE_SO_BUFFER 0x7918 /* GEN7+ */
/* DW1 */
# define SO_BUFFER_INDEX_SHIFT 29
# define SO_BUFFER_INDEX_MASK INTEL_MASK(30, 29)
# define SO_BUFFER_PITCH_SHIFT 0
# define SO_BUFFER_PITCH_MASK INTEL_MASK(11, 0)
/* DW2: start address */
/* DW3: end address. */
 
#define CMD_PIPE_CONTROL 0x7a00
 
#define CMD_MI_FLUSH 0x0200
 
 
/* Bitfields for the URB_WRITE message, DW2 of message header: */
#define URB_WRITE_PRIM_END 0x1
#define URB_WRITE_PRIM_START 0x2
#define URB_WRITE_PRIM_TYPE_SHIFT 2
 
 
/* Maximum number of entries that can be addressed using a binding table
* pointer of type SURFTYPE_BUFFER
*/
#define BRW_MAX_NUM_BUFFER_ENTRIES (1 << 27)
 
#include "intel_chipset.h"
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/brw_structs.h
0,0 → 1,1453
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#ifndef BRW_STRUCTS_H
#define BRW_STRUCTS_H
 
/* These seem to be passed around as function args, so it works out
* better to keep them as #defines:
*/
#define BRW_FLUSH_READ_CACHE 0x1
#define BRW_FLUSH_STATE_CACHE 0x2
#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8
 
struct brw_urb_fence
{
struct
{
GLuint length:8;
GLuint vs_realloc:1;
GLuint gs_realloc:1;
GLuint clp_realloc:1;
GLuint sf_realloc:1;
GLuint vfe_realloc:1;
GLuint cs_realloc:1;
GLuint pad:2;
GLuint opcode:16;
} header;
 
struct
{
GLuint vs_fence:10;
GLuint gs_fence:10;
GLuint clp_fence:10;
GLuint pad:2;
} bits0;
 
struct
{
GLuint sf_fence:10;
GLuint vf_fence:10;
GLuint cs_fence:11;
GLuint pad:1;
} bits1;
};
 
/* State structs for the various fixed function units:
*/
 
 
struct thread0
{
GLuint pad0:1;
GLuint grf_reg_count:3;
GLuint pad1:2;
GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
};
 
struct thread1
{
GLuint ext_halt_exception_enable:1;
GLuint sw_exception_enable:1;
GLuint mask_stack_exception_enable:1;
GLuint timeout_exception_enable:1;
GLuint illegal_op_exception_enable:1;
GLuint pad0:3;
GLuint depth_coef_urb_read_offset:6; /* WM only */
GLuint pad1:2;
GLuint floating_point_mode:1;
GLuint thread_priority:1;
GLuint binding_table_entry_count:8;
GLuint pad3:5;
GLuint single_program_flow:1;
};
 
struct thread2
{
GLuint per_thread_scratch_space:4;
GLuint pad0:6;
GLuint scratch_space_base_pointer:22;
};
 
struct thread3
{
GLuint dispatch_grf_start_reg:4;
GLuint urb_entry_read_offset:6;
GLuint pad0:1;
GLuint urb_entry_read_length:6;
GLuint pad1:1;
GLuint const_urb_entry_read_offset:6;
GLuint pad2:1;
GLuint const_urb_entry_read_length:6;
GLuint pad3:1;
};
 
 
 
struct brw_clip_unit_state
{
struct thread0 thread0;
struct
{
GLuint pad0:7;
GLuint sw_exception_enable:1;
GLuint pad1:3;
GLuint mask_stack_exception_enable:1;
GLuint pad2:1;
GLuint illegal_op_exception_enable:1;
GLuint pad3:2;
GLuint floating_point_mode:1;
GLuint thread_priority:1;
GLuint binding_table_entry_count:8;
GLuint pad4:5;
GLuint single_program_flow:1;
} thread1;
 
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
GLuint pad0:9;
GLuint gs_output_stats:1; /* not always */
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
GLuint pad1:1;
GLuint urb_entry_allocation_size:5;
GLuint pad2:1;
GLuint max_threads:5; /* may be less */
GLuint pad3:2;
} thread4;
struct
{
GLuint pad0:13;
GLuint clip_mode:3;
GLuint userclip_enable_flags:8;
GLuint userclip_must_clip:1;
GLuint negative_w_clip_test:1;
GLuint guard_band_enable:1;
GLuint viewport_z_clip_enable:1;
GLuint viewport_xy_clip_enable:1;
GLuint vertex_position_space:1;
GLuint api_mode:1;
GLuint pad2:1;
} clip5;
struct
{
GLuint pad0:5;
GLuint clipper_viewport_state_ptr:27;
} clip6;
 
GLfloat viewport_xmin;
GLfloat viewport_xmax;
GLfloat viewport_ymin;
GLfloat viewport_ymax;
};
 
struct gen6_blend_state
{
struct {
GLuint dest_blend_factor:5;
GLuint source_blend_factor:5;
GLuint pad3:1;
GLuint blend_func:3;
GLuint pad2:1;
GLuint ia_dest_blend_factor:5;
GLuint ia_source_blend_factor:5;
GLuint pad1:1;
GLuint ia_blend_func:3;
GLuint pad0:1;
GLuint ia_blend_enable:1;
GLuint blend_enable:1;
} blend0;
 
struct {
GLuint post_blend_clamp_enable:1;
GLuint pre_blend_clamp_enable:1;
GLuint clamp_range:2;
GLuint pad0:4;
GLuint x_dither_offset:2;
GLuint y_dither_offset:2;
GLuint dither_enable:1;
GLuint alpha_test_func:3;
GLuint alpha_test_enable:1;
GLuint pad1:1;
GLuint logic_op_func:4;
GLuint logic_op_enable:1;
GLuint pad2:1;
GLuint write_disable_b:1;
GLuint write_disable_g:1;
GLuint write_disable_r:1;
GLuint write_disable_a:1;
GLuint pad3:1;
GLuint alpha_to_coverage_dither:1;
GLuint alpha_to_one:1;
GLuint alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state
{
struct {
GLuint alpha_test_format:1;
GLuint pad0:14;
GLuint round_disable:1;
GLuint bf_stencil_ref:8;
GLuint stencil_ref:8;
} cc0;
 
union {
GLfloat alpha_ref_f;
struct {
GLuint ui:8;
GLuint pad0:24;
} alpha_ref_fi;
} cc1;
 
GLfloat constant_r;
GLfloat constant_g;
GLfloat constant_b;
GLfloat constant_a;
};
 
struct gen6_depth_stencil_state
{
struct {
GLuint pad0:3;
GLuint bf_stencil_pass_depth_pass_op:3;
GLuint bf_stencil_pass_depth_fail_op:3;
GLuint bf_stencil_fail_op:3;
GLuint bf_stencil_func:3;
GLuint bf_stencil_enable:1;
GLuint pad1:2;
GLuint stencil_write_enable:1;
GLuint stencil_pass_depth_pass_op:3;
GLuint stencil_pass_depth_fail_op:3;
GLuint stencil_fail_op:3;
GLuint stencil_func:3;
GLuint stencil_enable:1;
} ds0;
 
struct {
GLuint bf_stencil_write_mask:8;
GLuint bf_stencil_test_mask:8;
GLuint stencil_write_mask:8;
GLuint stencil_test_mask:8;
} ds1;
 
struct {
GLuint pad0:26;
GLuint depth_write_enable:1;
GLuint depth_test_func:3;
GLuint pad1:1;
GLuint depth_test_enable:1;
} ds2;
};
 
struct brw_cc_unit_state
{
struct
{
GLuint pad0:3;
GLuint bf_stencil_pass_depth_pass_op:3;
GLuint bf_stencil_pass_depth_fail_op:3;
GLuint bf_stencil_fail_op:3;
GLuint bf_stencil_func:3;
GLuint bf_stencil_enable:1;
GLuint pad1:2;
GLuint stencil_write_enable:1;
GLuint stencil_pass_depth_pass_op:3;
GLuint stencil_pass_depth_fail_op:3;
GLuint stencil_fail_op:3;
GLuint stencil_func:3;
GLuint stencil_enable:1;
} cc0;
 
struct
{
GLuint bf_stencil_ref:8;
GLuint stencil_write_mask:8;
GLuint stencil_test_mask:8;
GLuint stencil_ref:8;
} cc1;
 
struct
{
GLuint logicop_enable:1;
GLuint pad0:10;
GLuint depth_write_enable:1;
GLuint depth_test_function:3;
GLuint depth_test:1;
GLuint bf_stencil_write_mask:8;
GLuint bf_stencil_test_mask:8;
} cc2;
 
struct
{
GLuint pad0:8;
GLuint alpha_test_func:3;
GLuint alpha_test:1;
GLuint blend_enable:1;
GLuint ia_blend_enable:1;
GLuint pad1:1;
GLuint alpha_test_format:1;
GLuint pad2:16;
} cc3;
struct
{
GLuint pad0:5;
GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
} cc4;
struct
{
GLuint pad0:2;
GLuint ia_dest_blend_factor:5;
GLuint ia_src_blend_factor:5;
GLuint ia_blend_function:3;
GLuint statistics_enable:1;
GLuint logicop_func:4;
GLuint pad1:11;
GLuint dither_enable:1;
} cc5;
 
struct
{
GLuint clamp_post_alpha_blend:1;
GLuint clamp_pre_alpha_blend:1;
GLuint clamp_range:2;
GLuint pad0:11;
GLuint y_dither_offset:2;
GLuint x_dither_offset:2;
GLuint dest_blend_factor:5;
GLuint src_blend_factor:5;
GLuint blend_function:3;
} cc6;
 
struct {
union {
GLfloat f;
GLubyte ub[4];
} alpha_ref;
} cc7;
};
 
struct brw_sf_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
GLuint pad0:10;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
GLuint pad1:1;
GLuint urb_entry_allocation_size:5;
GLuint pad2:1;
GLuint max_threads:6;
GLuint pad3:1;
} thread4;
 
struct
{
GLuint front_winding:1;
GLuint viewport_transform:1;
GLuint pad0:3;
GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
} sf5;
struct
{
GLuint pad0:9;
GLuint dest_org_vbias:4;
GLuint dest_org_hbias:4;
GLuint scissor:1;
GLuint disable_2x2_trifilter:1;
GLuint disable_zero_pix_trifilter:1;
GLuint point_rast_rule:2;
GLuint line_endcap_aa_region_width:2;
GLuint line_width:4;
GLuint fast_scissor_disable:1;
GLuint cull_mode:2;
GLuint aa_enable:1;
} sf6;
 
struct
{
GLuint point_size:11;
GLuint use_point_size_state:1;
GLuint subpixel_precision:1;
GLuint sprite_point:1;
GLuint pad0:10;
GLuint aa_line_distance_mode:1;
GLuint trifan_pv:2;
GLuint linestrip_pv:2;
GLuint tristrip_pv:2;
GLuint line_last_pixel_enable:1;
} sf7;
 
};
 
struct gen6_scissor_rect
{
GLuint xmin:16;
GLuint ymin:16;
GLuint xmax:16;
GLuint ymax:16;
};
 
struct brw_gs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
GLuint pad0:8;
GLuint rendering_enable:1; /* for Ironlake */
GLuint pad4:1;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
GLuint pad1:1;
GLuint urb_entry_allocation_size:5;
GLuint pad2:1;
GLuint max_threads:5;
GLuint pad3:2;
} thread4;
struct
{
GLuint sampler_count:3;
GLuint pad0:2;
GLuint sampler_state_pointer:27;
} gs5;
 
struct
{
GLuint max_vp_index:4;
GLuint pad0:12;
GLuint svbi_post_inc_value:10;
GLuint pad1:1;
GLuint svbi_post_inc_enable:1;
GLuint svbi_payload:1;
GLuint discard_adjaceny:1;
GLuint reorder_enable:1;
GLuint pad2:1;
} gs6;
};
 
 
struct brw_vs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct
{
GLuint pad0:10;
GLuint stats_enable:1;
GLuint nr_urb_entries:7;
GLuint pad1:1;
GLuint urb_entry_allocation_size:5;
GLuint pad2:1;
GLuint max_threads:6;
GLuint pad3:1;
} thread4;
 
struct
{
GLuint sampler_count:3;
GLuint pad0:2;
GLuint sampler_state_pointer:27;
} vs5;
 
struct
{
GLuint vs_enable:1;
GLuint vert_cache_disable:1;
GLuint pad0:30;
} vs6;
};
 
 
struct brw_wm_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct {
GLuint stats_enable:1;
GLuint depth_buffer_clear:1;
GLuint sampler_count:3;
GLuint sampler_state_pointer:27;
} wm4;
struct
{
GLuint enable_8_pix:1;
GLuint enable_16_pix:1;
GLuint enable_32_pix:1;
GLuint enable_con_32_pix:1;
GLuint enable_con_64_pix:1;
GLuint pad0:1;
 
/* These next four bits are for Ironlake+ */
GLuint fast_span_coverage_enable:1;
GLuint depth_buffer_clear:1;
GLuint depth_buffer_resolve_enable:1;
GLuint hierarchical_depth_buffer_resolve_enable:1;
 
GLuint legacy_global_depth_bias:1;
GLuint line_stipple:1;
GLuint depth_offset:1;
GLuint polygon_stipple:1;
GLuint line_aa_region_width:2;
GLuint line_endcap_aa_region_width:2;
GLuint early_depth_test:1;
GLuint thread_dispatch_enable:1;
GLuint program_uses_depth:1;
GLuint program_computes_depth:1;
GLuint program_uses_killpixel:1;
GLuint legacy_line_rast: 1;
GLuint transposed_urb_read_enable:1;
GLuint max_threads:7;
} wm5;
GLfloat global_depth_offset_constant;
GLfloat global_depth_offset_scale;
/* for Ironlake only */
struct {
GLuint pad0:1;
GLuint grf_reg_count_1:3;
GLuint pad1:2;
GLuint kernel_start_pointer_1:26;
} wm8;
 
struct {
GLuint pad0:1;
GLuint grf_reg_count_2:3;
GLuint pad1:2;
GLuint kernel_start_pointer_2:26;
} wm9;
 
struct {
GLuint pad0:1;
GLuint grf_reg_count_3:3;
GLuint pad1:2;
GLuint kernel_start_pointer_3:26;
} wm10;
};
 
struct brw_sampler_default_color {
GLfloat color[4];
};
 
struct gen5_sampler_default_color {
uint8_t ub[4];
float f[4];
uint16_t hf[4];
uint16_t us[4];
int16_t s[4];
uint8_t b[4];
};
 
struct brw_sampler_state
{
struct
{
GLuint shadow_function:3;
GLuint lod_bias:11;
GLuint min_filter:3;
GLuint mag_filter:3;
GLuint mip_filter:2;
GLuint base_level:5;
GLuint min_mag_neq:1;
GLuint lod_preclamp:1;
GLuint default_color_mode:1;
GLuint pad0:1;
GLuint disable:1;
} ss0;
 
struct
{
GLuint r_wrap_mode:3;
GLuint t_wrap_mode:3;
GLuint s_wrap_mode:3;
GLuint cube_control_mode:1;
GLuint pad:2;
GLuint max_lod:10;
GLuint min_lod:10;
} ss1;
 
struct
{
GLuint pad:5;
GLuint default_color_pointer:27;
} ss2;
struct
{
GLuint non_normalized_coord:1;
GLuint pad:12;
GLuint address_round:6;
GLuint max_aniso:3;
GLuint chroma_key_mode:1;
GLuint chroma_key_index:2;
GLuint chroma_key_enable:1;
GLuint monochrome_filter_width:3;
GLuint monochrome_filter_height:3;
} ss3;
};
 
struct gen7_sampler_state
{
struct
{
GLuint aniso_algorithm:1;
GLuint lod_bias:13;
GLuint min_filter:3;
GLuint mag_filter:3;
GLuint mip_filter:2;
GLuint base_level:5;
GLuint pad1:1;
GLuint lod_preclamp:1;
GLuint default_color_mode:1;
GLuint pad0:1;
GLuint disable:1;
} ss0;
 
struct
{
GLuint cube_control_mode:1;
GLuint shadow_function:3;
GLuint pad:4;
GLuint max_lod:12;
GLuint min_lod:12;
} ss1;
 
struct
{
GLuint pad:5;
GLuint default_color_pointer:27;
} ss2;
 
struct
{
GLuint r_wrap_mode:3;
GLuint t_wrap_mode:3;
GLuint s_wrap_mode:3;
GLuint pad:1;
GLuint non_normalized_coord:1;
GLuint trilinear_quality:2;
GLuint address_round:6;
GLuint max_aniso:3;
GLuint chroma_key_mode:1;
GLuint chroma_key_index:2;
GLuint chroma_key_enable:1;
GLuint pad0:6;
} ss3;
};
 
struct brw_clipper_viewport
{
GLfloat xmin;
GLfloat xmax;
GLfloat ymin;
GLfloat ymax;
};
 
struct brw_cc_viewport
{
GLfloat min_depth;
GLfloat max_depth;
};
 
struct brw_sf_viewport
{
struct {
GLfloat m00;
GLfloat m11;
GLfloat m22;
GLfloat m30;
GLfloat m31;
GLfloat m32;
} viewport;
 
/* scissor coordinates are inclusive */
struct {
GLshort xmin;
GLshort ymin;
GLshort xmax;
GLshort ymax;
} scissor;
};
 
struct gen6_sf_viewport {
GLfloat m00;
GLfloat m11;
GLfloat m22;
GLfloat m30;
GLfloat m31;
GLfloat m32;
};
 
struct gen7_sf_clip_viewport {
struct {
GLfloat m00;
GLfloat m11;
GLfloat m22;
GLfloat m30;
GLfloat m31;
GLfloat m32;
} viewport;
 
GLuint pad0[2];
 
struct {
GLfloat xmin;
GLfloat xmax;
GLfloat ymin;
GLfloat ymax;
} guardband;
 
GLfloat pad1[4];
};
 
struct brw_vertex_element_state
{
struct
{
GLuint src_offset:11;
GLuint pad:5;
GLuint src_format:9;
GLuint pad0:1;
GLuint valid:1;
GLuint vertex_buffer_index:5;
} ve0;
struct
{
GLuint dst_offset:8;
GLuint pad:8;
GLuint vfcomponent3:4;
GLuint vfcomponent2:4;
GLuint vfcomponent1:4;
GLuint vfcomponent0:4;
} ve1;
};
 
struct brw_urb_immediate {
GLuint opcode:4;
GLuint offset:6;
GLuint swizzle_control:2;
GLuint pad:1;
GLuint allocate:1;
GLuint used:1;
GLuint complete:1;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
};
 
/* Instruction format for the execution units:
*/
struct brw_instruction
{
struct
{
GLuint opcode:7;
GLuint pad:1;
GLuint access_mode:1;
GLuint mask_control:1;
GLuint dependency_control:2;
GLuint compression_control:2; /* gen6: quarter control */
GLuint thread_control:2;
GLuint predicate_control:4;
GLuint predicate_inverse:1;
GLuint execution_size:3;
/**
* Conditional Modifier for most instructions. On Gen6+, this is also
* used for the SEND instruction's Message Target/SFID.
*/
GLuint destreg__conditionalmod:4;
GLuint acc_wr_control:1;
GLuint cmpt_control:1;
GLuint debug_control:1;
GLuint saturate:1;
} header;
 
union {
struct
{
GLuint dest_reg_file:2;
GLuint dest_reg_type:3;
GLuint src0_reg_file:2;
GLuint src0_reg_type:3;
GLuint src1_reg_file:2;
GLuint src1_reg_type:3;
GLuint nibctrl:1; /* gen7+ */
GLuint dest_subreg_nr:5;
GLuint dest_reg_nr:8;
GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} da1;
 
struct
{
GLuint dest_reg_file:2;
GLuint dest_reg_type:3;
GLuint src0_reg_file:2;
GLuint src0_reg_type:3;
GLuint src1_reg_file:2; /* 0x00000c00 */
GLuint src1_reg_type:3; /* 0x00007000 */
GLuint nibctrl:1; /* gen7+ */
GLint dest_indirect_offset:10; /* offset against the deref'd address reg */
GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} ia1;
 
struct
{
GLuint dest_reg_file:2;
GLuint dest_reg_type:3;
GLuint src0_reg_file:2;
GLuint src0_reg_type:3;
GLuint src1_reg_file:2;
GLuint src1_reg_type:3;
GLuint nibctrl:1; /* gen7+ */
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} da16;
 
struct
{
GLuint dest_reg_file:2;
GLuint dest_reg_type:3;
GLuint src0_reg_file:2;
GLuint src0_reg_type:3;
GLuint src1_reg_file:2;
GLuint src1_reg_type:3;
GLuint nibctrl:1; /* gen7+ */
GLuint dest_writemask:4;
GLint dest_indirect_offset:6;
GLuint dest_subreg_nr:3;
GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} ia16;
 
struct {
GLuint dest_reg_file:2;
GLuint dest_reg_type:3;
GLuint src0_reg_file:2;
GLuint src0_reg_type:3;
GLuint src1_reg_file:2;
GLuint src1_reg_type:3;
GLuint pad:1;
 
GLint jump_count:16;
} branch_gen6;
 
struct {
GLuint dest_reg_file:1; /* gen6, not gen7+ */
GLuint flag_subreg_num:1;
GLuint flag_reg_nr:1; /* gen7+ */
GLuint pad0:1;
GLuint src0_abs:1;
GLuint src0_negate:1;
GLuint src1_abs:1;
GLuint src1_negate:1;
GLuint src2_abs:1;
GLuint src2_negate:1;
GLuint src_type:2; /* gen7+ */
GLuint dst_type:2; /* gen7+ */
GLuint pad1:1;
GLuint nibctrl:1; /* gen7+ */
GLuint pad2:1;
GLuint dest_writemask:4;
GLuint dest_subreg_nr:3;
GLuint dest_reg_nr:8;
} da3src;
 
uint32_t ud;
} bits1;
 
 
union {
struct
{
GLuint src0_subreg_nr:5;
GLuint src0_reg_nr:8;
GLuint src0_abs:1;
GLuint src0_negate:1;
GLuint src0_address_mode:1;
GLuint src0_horiz_stride:2;
GLuint src0_width:3;
GLuint src0_vert_stride:4;
GLuint flag_subreg_nr:1;
GLuint flag_reg_nr:1; /* gen7+ */
GLuint pad:5;
} da1;
 
struct
{
GLint src0_indirect_offset:10;
GLuint src0_subreg_nr:3;
GLuint src0_abs:1;
GLuint src0_negate:1;
GLuint src0_address_mode:1;
GLuint src0_horiz_stride:2;
GLuint src0_width:3;
GLuint src0_vert_stride:4;
GLuint flag_subreg_nr:1;
GLuint flag_reg_nr:1; /* gen7+ */
GLuint pad:5;
} ia1;
 
struct
{
GLuint src0_swz_x:2;
GLuint src0_swz_y:2;
GLuint src0_subreg_nr:1;
GLuint src0_reg_nr:8;
GLuint src0_abs:1;
GLuint src0_negate:1;
GLuint src0_address_mode:1;
GLuint src0_swz_z:2;
GLuint src0_swz_w:2;
GLuint pad0:1;
GLuint src0_vert_stride:4;
GLuint flag_subreg_nr:1;
GLuint flag_reg_nr:1; /* gen7+ */
GLuint pad1:5;
} da16;
 
struct
{
GLuint src0_swz_x:2;
GLuint src0_swz_y:2;
GLint src0_indirect_offset:6;
GLuint src0_subreg_nr:3;
GLuint src0_abs:1;
GLuint src0_negate:1;
GLuint src0_address_mode:1;
GLuint src0_swz_z:2;
GLuint src0_swz_w:2;
GLuint pad0:1;
GLuint src0_vert_stride:4;
GLuint flag_subreg_nr:1;
GLuint flag_reg_nr:1; /* gen7+ */
GLuint pad1:5;
} ia16;
 
/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
*
* Does not apply to Gen6+. The SFID/message target moved to bits
* 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
*/
struct
{
GLuint pad:26;
GLuint end_of_thread:1;
GLuint pad1:1;
GLuint sfid:4;
} send_gen5; /* for Ironlake only */
 
struct {
GLuint src0_rep_ctrl:1;
GLuint src0_swizzle:8;
GLuint src0_subreg_nr:3;
GLuint src0_reg_nr:8;
GLuint pad0:1;
GLuint src1_rep_ctrl:1;
GLuint src1_swizzle:8;
GLuint src1_subreg_nr_low:2;
} da3src;
 
uint32_t ud;
} bits2;
 
union
{
struct
{
GLuint src1_subreg_nr:5;
GLuint src1_reg_nr:8;
GLuint src1_abs:1;
GLuint src1_negate:1;
GLuint src1_address_mode:1;
GLuint src1_horiz_stride:2;
GLuint src1_width:3;
GLuint src1_vert_stride:4;
GLuint pad0:7;
} da1;
 
struct
{
GLuint src1_swz_x:2;
GLuint src1_swz_y:2;
GLuint src1_subreg_nr:1;
GLuint src1_reg_nr:8;
GLuint src1_abs:1;
GLuint src1_negate:1;
GLuint src1_address_mode:1;
GLuint src1_swz_z:2;
GLuint src1_swz_w:2;
GLuint pad1:1;
GLuint src1_vert_stride:4;
GLuint pad2:7;
} da16;
 
struct
{
GLint src1_indirect_offset:10;
GLuint src1_subreg_nr:3;
GLuint src1_abs:1;
GLuint src1_negate:1;
GLuint src1_address_mode:1;
GLuint src1_horiz_stride:2;
GLuint src1_width:3;
GLuint src1_vert_stride:4;
GLuint pad1:7;
} ia1;
 
struct
{
GLuint src1_swz_x:2;
GLuint src1_swz_y:2;
GLint src1_indirect_offset:6;
GLuint src1_subreg_nr:3;
GLuint src1_abs:1;
GLuint src1_negate:1;
GLuint pad0:1;
GLuint src1_swz_z:2;
GLuint src1_swz_w:2;
GLuint pad1:1;
GLuint src1_vert_stride:4;
GLuint pad2:7;
} ia16;
 
 
struct
{
GLint jump_count:16; /* note: signed */
GLuint pop_count:4;
GLuint pad0:12;
} if_else;
 
/* This is also used for gen7 IF/ELSE instructions */
struct
{
/* Signed jump distance to the ip to jump to if all channels
* are disabled after the break or continue. It should point
* to the end of the innermost control flow block, as that's
* where some channel could get re-enabled.
*/
int jip:16;
 
/* Signed jump distance to the location to resume execution
* of this channel if it's enabled for the break or continue.
*/
int uip:16;
} break_cont;
 
/**
* \defgroup SEND instructions / Message Descriptors
*
* @{
*/
 
/**
* Generic Message Descriptor for Gen4 SEND instructions. The structs
* below expand function_control to something specific for their
* message. Due to struct packing issues, they duplicate these bits.
*
* See the G45 PRM, Volume 4, Table 14-15.
*/
struct {
GLuint function_control:16;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
} generic;
 
/**
* Generic Message Descriptor for Gen5-7 SEND instructions.
*
* See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most
* of the information on the SEND instruction is missing from the public
* Ironlake PRM.)
*
* The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
* According to the SEND instruction description:
* "The MSb of the message description, the EOT field, always comes from
* bit 127 of the instruction word"...which is bit 31 of this field.
*/
struct {
GLuint function_control:19;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} generic_gen5;
 
/** G45 PRM, Volume 4, Section 6.1.1.1 */
struct {
GLuint function:4;
GLuint int_type:1;
GLuint precision:1;
GLuint saturate:1;
GLuint data_type:1;
GLuint pad0:8;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
} math;
 
/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
struct {
GLuint function:4;
GLuint int_type:1;
GLuint precision:1;
GLuint saturate:1;
GLuint data_type:1;
GLuint snapshot:1;
GLuint pad0:10;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} math_gen5;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
struct {
GLuint binding_table_index:8;
GLuint sampler:4;
GLuint return_format:2;
GLuint msg_type:2;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
} sampler;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
struct {
GLuint binding_table_index:8;
GLuint sampler:4;
GLuint msg_type:4;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
} sampler_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
struct {
GLuint binding_table_index:8;
GLuint sampler:4;
GLuint msg_type:4;
GLuint simd_mode:2;
GLuint pad0:1;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} sampler_gen5;
 
struct {
GLuint binding_table_index:8;
GLuint sampler:4;
GLuint msg_type:5;
GLuint simd_mode:2;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} sampler_gen7;
 
struct brw_urb_immediate urb;
 
struct {
GLuint opcode:4;
GLuint offset:6;
GLuint swizzle_control:2;
GLuint pad:1;
GLuint allocate:1;
GLuint used:1;
GLuint complete:1;
GLuint pad0:3;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} urb_gen5;
 
struct {
GLuint opcode:3;
GLuint offset:11;
GLuint swizzle_control:1;
GLuint complete:1;
GLuint per_slot_offset:1;
GLuint pad0:2;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} urb_gen7;
 
/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
struct {
GLuint binding_table_index:8;
GLuint msg_control:4;
GLuint msg_type:2;
GLuint target_cache:2;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
} dp_read;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2 */
struct {
GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint msg_type:3;
GLuint target_cache:2;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
} dp_read_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint msg_type:3;
GLuint target_cache:2;
GLuint pad0:3;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} dp_read_gen5;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */
struct {
GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint last_render_target:1;
GLuint msg_type:3;
GLuint send_commit_msg:1;
GLuint response_length:4;
GLuint msg_length:4;
GLuint msg_target:4;
GLuint pad1:3;
GLuint end_of_thread:1;
} dp_write;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint last_render_target:1;
GLuint msg_type:3;
GLuint send_commit_msg:1;
GLuint pad0:3;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} dp_write_gen5;
 
/**
* Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
*
* See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
**/
struct {
GLuint binding_table_index:8;
GLuint msg_control:5;
GLuint msg_type:3;
GLuint pad0:3;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} gen6_dp_sampler_const_cache;
 
/**
* Message for the Sandybridge Render Cache Data Port.
*
* Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
* Section 3.9.2.1.1: Message Descriptor.
*
* "Slot Group Select" and "Last Render Target" are part of the
* 5-bit message control for Render Target Write messages. See
* Section 3.9.9.2.1 of the same volume.
*/
struct {
GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint slot_group_select:1;
GLuint last_render_target:1;
GLuint msg_type:4;
GLuint send_commit_msg:1;
GLuint pad0:1;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad1:2;
GLuint end_of_thread:1;
} gen6_dp;
 
/**
* Message for any of the Gen7 Data Port caches.
*
* Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
* Data Port Messages / Message Descriptor. Once again, "Slot Group
* Select" and "Last Render Target" are part of the 6-bit message
* control for Render Target Writes.
*/
struct {
GLuint binding_table_index:8;
GLuint msg_control:3;
GLuint slot_group_select:1;
GLuint last_render_target:1;
GLuint msg_control_pad:1;
GLuint msg_type:4;
GLuint pad1:1;
GLuint header_present:1;
GLuint response_length:5;
GLuint msg_length:4;
GLuint pad2:2;
GLuint end_of_thread:1;
} gen7_dp;
/** @} */
 
struct {
GLuint src1_subreg_nr_high:1;
GLuint src1_reg_nr:8;
GLuint pad0:1;
GLuint src2_rep_ctrl:1;
GLuint src2_swizzle:8;
GLuint src2_subreg_nr:3;
GLuint src2_reg_nr:8;
GLuint pad1:2;
} da3src;
 
GLint d;
GLuint ud;
float f;
} bits3;
};
 
struct brw_compact_instruction {
struct {
unsigned opcode:7; /* 0- 6 */
unsigned debug_control:1; /* 7- 7 */
unsigned control_index:5; /* 8-12 */
unsigned data_type_index:5; /* 13-17 */
unsigned sub_reg_index:5; /* 18-22 */
unsigned acc_wr_control:1; /* 23-23 */
unsigned conditionalmod:4; /* 24-27 */
unsigned flag_subreg_nr:1; /* 28-28 */
unsigned cmpt_ctrl:1; /* 29-29 */
unsigned src0_index:2; /* 30-31 */
} dw0;
 
struct {
unsigned src0_index:3; /* 32-24 */
unsigned src1_index:5; /* 35-39 */
unsigned dst_reg_nr:8; /* 40-47 */
unsigned src0_reg_nr:8; /* 48-55 */
unsigned src1_reg_nr:8; /* 56-63 */
} dw1;
};
 
#endif
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/intel_chipset.h
0,0 → 1,266
/*
* Copyright © 2007 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
 
#define PCI_CHIP_I810 0x7121
#define PCI_CHIP_I810_DC100 0x7123
#define PCI_CHIP_I810_E 0x7125
#define PCI_CHIP_I815 0x1132
 
#define PCI_CHIP_I830_M 0x3577
#define PCI_CHIP_845_G 0x2562
#define PCI_CHIP_I855_GM 0x3582
#define PCI_CHIP_I865_G 0x2572
 
#define PCI_CHIP_I915_G 0x2582
#define PCI_CHIP_E7221_G 0x258A
#define PCI_CHIP_I915_GM 0x2592
#define PCI_CHIP_I945_G 0x2772
#define PCI_CHIP_I945_GM 0x27A2
#define PCI_CHIP_I945_GME 0x27AE
 
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_Q33_G 0x29D2
 
#define PCI_CHIP_IGD_GM 0xA011
#define PCI_CHIP_IGD_G 0xA001
 
#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM)
#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G)
#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
 
#define PCI_CHIP_I965_G 0x29A2
#define PCI_CHIP_I965_Q 0x2992
#define PCI_CHIP_I965_G_1 0x2982
#define PCI_CHIP_I946_GZ 0x2972
#define PCI_CHIP_I965_GM 0x2A02
#define PCI_CHIP_I965_GME 0x2A12
 
#define PCI_CHIP_GM45_GM 0x2A42
 
#define PCI_CHIP_IGD_E_G 0x2E02
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_B43_G 0x2E42
#define PCI_CHIP_B43_G1 0x2E92
 
#define PCI_CHIP_ILD_G 0x0042
#define PCI_CHIP_ILM_G 0x0046
 
#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */
#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */
#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */
 
#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
 
#define PCI_CHIP_BAYTRAIL_M_1 0x0F31
#define PCI_CHIP_BAYTRAIL_M_2 0x0F32
#define PCI_CHIP_BAYTRAIL_M_3 0x0F33
#define PCI_CHIP_BAYTRAIL_M_4 0x0157
#define PCI_CHIP_BAYTRAIL_D 0x0155
 
#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */
#define PCI_CHIP_HASWELL_GT2 0x0412
#define PCI_CHIP_HASWELL_GT3 0x0422
#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */
#define PCI_CHIP_HASWELL_M_GT2 0x0416
#define PCI_CHIP_HASWELL_M_GT3 0x0426
#define PCI_CHIP_HASWELL_S_GT1 0x040A /* Server */
#define PCI_CHIP_HASWELL_S_GT2 0x041A
#define PCI_CHIP_HASWELL_S_GT3 0x042A
#define PCI_CHIP_HASWELL_SDV_GT1 0x0C02 /* Desktop */
#define PCI_CHIP_HASWELL_SDV_GT2 0x0C12
#define PCI_CHIP_HASWELL_SDV_GT3 0x0C22
#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 /* Mobile */
#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16
#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0C26
#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A /* Server */
#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A
#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0C2A
#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */
#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12
#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22
#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */
#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16
#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A
#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */
#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12
#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16
#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A
#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A
 
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I915_GM || \
devid == PCI_CHIP_I945_GM || \
devid == PCI_CHIP_I945_GME || \
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_GM45_GM || \
IS_IGD(devid) || \
devid == PCI_CHIP_ILM_G)
 
#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
devid == PCI_CHIP_Q45_G || \
devid == PCI_CHIP_G45_G || \
devid == PCI_CHIP_G41_G || \
devid == PCI_CHIP_B43_G || \
devid == PCI_CHIP_B43_G1)
#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
 
#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G)
#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G)
#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid))
 
#define IS_915(devid) (devid == PCI_CHIP_I915_G || \
devid == PCI_CHIP_E7221_G || \
devid == PCI_CHIP_I915_GM)
 
#define IS_945(devid) (devid == PCI_CHIP_I945_G || \
devid == PCI_CHIP_I945_GM || \
devid == PCI_CHIP_I945_GME || \
devid == PCI_CHIP_G33_G || \
devid == PCI_CHIP_Q33_G || \
devid == PCI_CHIP_Q35_G || IS_IGD(devid))
 
#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \
devid == PCI_CHIP_I965_Q || \
devid == PCI_CHIP_I965_G_1 || \
devid == PCI_CHIP_I965_GM || \
devid == PCI_CHIP_I965_GME || \
devid == PCI_CHIP_I946_GZ || \
IS_G4X(devid))
 
/* Compat macro for intel_decode.c */
#define IS_IRONLAKE(devid) IS_GEN5(devid)
 
#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
devid == PCI_CHIP_SANDYBRIDGE_S)
 
#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
 
#define IS_GEN6(devid) (IS_SNB_GT1(devid) || IS_SNB_GT2(devid))
 
#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \
devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
devid == PCI_CHIP_IVYBRIDGE_S_GT1)
 
#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \
devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
devid == PCI_CHIP_IVYBRIDGE_S_GT2)
 
#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))
 
#define IS_BAYTRAIL(devid) (devid == PCI_CHIP_BAYTRAIL_M_1 || \
devid == PCI_CHIP_BAYTRAIL_M_2 || \
devid == PCI_CHIP_BAYTRAIL_M_3 || \
devid == PCI_CHIP_BAYTRAIL_M_4 || \
devid == PCI_CHIP_BAYTRAIL_D)
 
#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \
IS_BAYTRAIL(devid) || \
IS_HASWELL(devid))
 
#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \
devid == PCI_CHIP_HASWELL_M_GT1 || \
devid == PCI_CHIP_HASWELL_S_GT1 || \
devid == PCI_CHIP_HASWELL_SDV_GT1 || \
devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \
devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \
devid == PCI_CHIP_HASWELL_ULT_GT1 || \
devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \
devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \
devid == PCI_CHIP_HASWELL_CRW_GT1 || \
devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \
devid == PCI_CHIP_HASWELL_CRW_S_GT1)
#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2 || \
devid == PCI_CHIP_HASWELL_M_GT2 || \
devid == PCI_CHIP_HASWELL_S_GT2 || \
devid == PCI_CHIP_HASWELL_SDV_GT2 || \
devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \
devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \
devid == PCI_CHIP_HASWELL_ULT_GT2 || \
devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \
devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \
devid == PCI_CHIP_HASWELL_CRW_GT2 || \
devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \
devid == PCI_CHIP_HASWELL_CRW_S_GT2)
#define IS_HSW_GT3(devid) (devid == PCI_CHIP_HASWELL_GT3 || \
devid == PCI_CHIP_HASWELL_M_GT3 || \
devid == PCI_CHIP_HASWELL_S_GT3 || \
devid == PCI_CHIP_HASWELL_SDV_GT3 || \
devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \
devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \
devid == PCI_CHIP_HASWELL_ULT_GT3 || \
devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \
devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \
devid == PCI_CHIP_HASWELL_CRW_GT3 || \
devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \
devid == PCI_CHIP_HASWELL_CRW_S_GT3)
 
#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \
IS_HSW_GT2(devid) || \
IS_HSW_GT3(devid))
 
#define IS_965(devid) (IS_GEN4(devid) || \
IS_G4X(devid) || \
IS_GEN5(devid) || \
IS_GEN6(devid) || \
IS_GEN7(devid))
 
#define IS_9XX(devid) (IS_915(devid) || \
IS_945(devid) || \
IS_965(devid))
 
#define IS_GEN3(devid) (IS_915(devid) || \
IS_945(devid))
 
#define IS_GEN2(devid) (devid == PCI_CHIP_I830_M || \
devid == PCI_CHIP_845_G || \
devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I865_G)
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/include/intel_reg.h
0,0 → 1,298
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#define CMD_MI (0x0 << 29)
#define CMD_2D (0x2 << 29)
#define CMD_3D (0x3 << 29)
 
#define MI_NOOP (CMD_MI | 0)
 
#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23)
 
#define MI_FLUSH (CMD_MI | (4 << 23))
#define FLUSH_MAP_CACHE (1 << 0)
#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2)
 
#define MI_LOAD_REGISTER_IMM (CMD_MI | (0x22 << 23))
 
#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2)
 
/* Stalls command execution waiting for the given events to have occurred. */
#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23))
#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6)
#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2)
 
#define MI_STORE_REGISTER_MEM (CMD_MI | (0x24 << 23))
# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22)
 
/* p189 */
#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16))
#define I1_LOAD_S(n) (1<<(4+n))
 
#define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3)
 
/** @{
*
* PIPE_CONTROL operation, a combination MI_FLUSH and register write with
* additional flushing control.
*/
#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24))
#define PIPE_CONTROL_CS_STALL (1 << 20)
#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19)
#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18)
#define PIPE_CONTROL_SYNC_GFDT (1 << 17)
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
#define PIPE_CONTROL_NO_WRITE (0 << 14)
#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define PIPE_CONTROL_WRITE_FLUSH (1 << 12)
#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11)
#define PIPE_CONTROL_TC_FLUSH (1 << 10) /* GM45+ only */
#define PIPE_CONTROL_ISP_DIS (1 << 9)
#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
/* GT */
#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
#define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
 
/** @} */
 
/** @{
* 915 definitions
*
* 915 documents say that bits 31:28 and 1 are "undefined, must be zero."
*/
#define S0_VB_OFFSET_MASK 0x0ffffffc
#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
/** @} */
 
/** @{
* 830 definitions
*/
#define S0_VB_OFFSET_MASK_830 0xffffff80
#define S0_VB_PITCH_SHIFT_830 1
#define S0_VB_ENABLE_830 (1<<0)
/** @} */
 
#define S1_VERTEX_WIDTH_SHIFT 24
#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
#define S1_VERTEX_PITCH_SHIFT 16
#define S1_VERTEX_PITCH_MASK (0x3f<<16)
 
#define TEXCOORDFMT_2D 0x0
#define TEXCOORDFMT_3D 0x1
#define TEXCOORDFMT_4D 0x2
#define TEXCOORDFMT_1D 0x3
#define TEXCOORDFMT_2D_16 0x4
#define TEXCOORDFMT_4D_16 0x5
#define TEXCOORDFMT_NOT_PRESENT 0xf
#define S2_TEXCOORD_FMT0_MASK 0xf
#define S2_TEXCOORD_FMT1_SHIFT 4
#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
#define S2_TEXCOORD_NONE (~0)
#define S2_TEX_COUNT_SHIFT_830 12
#define S2_VERTEX_1_WIDTH_SHIFT_830 0
#define S2_VERTEX_0_WIDTH_SHIFT_830 6
/* S3 not interesting */
 
#define S4_POINT_WIDTH_SHIFT 23
#define S4_POINT_WIDTH_MASK (0x1ff<<23)
#define S4_LINE_WIDTH_SHIFT 19
#define S4_LINE_WIDTH_ONE (0x2<<19)
#define S4_LINE_WIDTH_MASK (0xf<<19)
#define S4_FLATSHADE_ALPHA (1<<18)
#define S4_FLATSHADE_FOG (1<<17)
#define S4_FLATSHADE_SPECULAR (1<<16)
#define S4_FLATSHADE_COLOR (1<<15)
#define S4_CULLMODE_BOTH (0<<13)
#define S4_CULLMODE_NONE (1<<13)
#define S4_CULLMODE_CW (2<<13)
#define S4_CULLMODE_CCW (3<<13)
#define S4_CULLMODE_MASK (3<<13)
#define S4_VFMT_POINT_WIDTH (1<<12)
#define S4_VFMT_SPEC_FOG (1<<11)
#define S4_VFMT_COLOR (1<<10)
#define S4_VFMT_DEPTH_OFFSET (1<<9)
#define S4_VFMT_XYZ (1<<6)
#define S4_VFMT_XYZW (2<<6)
#define S4_VFMT_XY (3<<6)
#define S4_VFMT_XYW (4<<6)
#define S4_VFMT_XYZW_MASK (7<<6)
#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
#define S4_VFMT_FOG_PARAM (1<<2)
#define S4_SPRITE_POINT_ENABLE (1<<1)
#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
 
#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
S4_VFMT_SPEC_FOG | \
S4_VFMT_COLOR | \
S4_VFMT_DEPTH_OFFSET | \
S4_VFMT_XYZW_MASK | \
S4_VFMT_FOG_PARAM)
 
 
#define S5_WRITEDISABLE_ALPHA (1<<31)
#define S5_WRITEDISABLE_RED (1<<30)
#define S5_WRITEDISABLE_GREEN (1<<29)
#define S5_WRITEDISABLE_BLUE (1<<28)
#define S5_WRITEDISABLE_MASK (0xf<<28)
#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
#define S5_LAST_PIXEL_ENABLE (1<<26)
#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
#define S5_FOG_ENABLE (1<<24)
#define S5_STENCIL_REF_SHIFT 16
#define S5_STENCIL_REF_MASK (0xff<<16)
#define S5_STENCIL_TEST_FUNC_SHIFT 13
#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
#define S5_STENCIL_FAIL_SHIFT 10
#define S5_STENCIL_FAIL_MASK (0x7<<10)
#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
#define S5_STENCIL_WRITE_ENABLE (1<<3)
#define S5_STENCIL_TEST_ENABLE (1<<2)
#define S5_COLOR_DITHER_ENABLE (1<<1)
#define S5_LOGICOP_ENABLE (1<<0)
 
 
#define S6_ALPHA_TEST_ENABLE (1<<31)
#define S6_ALPHA_TEST_FUNC_SHIFT 28
#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
#define S6_ALPHA_REF_SHIFT 20
#define S6_ALPHA_REF_MASK (0xff<<20)
#define S6_DEPTH_TEST_ENABLE (1<<19)
#define S6_DEPTH_TEST_FUNC_SHIFT 16
#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
#define S6_CBUF_BLEND_ENABLE (1<<15)
#define S6_CBUF_BLEND_FUNC_SHIFT 12
#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
#define S6_DEPTH_WRITE_ENABLE (1<<3)
#define S6_COLOR_WRITE_ENABLE (1<<2)
#define S6_TRISTRIP_PV_SHIFT 0
#define S6_TRISTRIP_PV_MASK (0x3<<0)
 
#define S7_DEPTH_OFFSET_CONST_MASK ~0
 
/* p143 */
#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
/* Dword 1 */
#define BUF_3D_ID_COLOR_BACK (0x3<<24)
#define BUF_3D_ID_DEPTH (0x7<<24)
#define BUF_3D_USE_FENCE (1<<23)
#define BUF_3D_TILED_SURFACE (1<<22)
#define BUF_3D_TILE_WALK_X 0
#define BUF_3D_TILE_WALK_Y (1<<21)
#define BUF_3D_PITCH(x) (((x)/4)<<2)
/* Dword 2 */
#define BUF_3D_ADDR(x) ((x) & ~0x3)
 
/* Primitive dispatch on 830-945 */
#define _3DPRIMITIVE (CMD_3D | (0x1f << 24))
#define PRIM_INDIRECT (1<<23)
#define PRIM_INLINE (0<<23)
#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
#define PRIM_INDIRECT_ELTS (1<<17)
 
#define PRIM3D_TRILIST (0x0<<18)
#define PRIM3D_TRISTRIP (0x1<<18)
#define PRIM3D_TRISTRIP_RVRSE (0x2<<18)
#define PRIM3D_TRIFAN (0x3<<18)
#define PRIM3D_POLY (0x4<<18)
#define PRIM3D_LINELIST (0x5<<18)
#define PRIM3D_LINESTRIP (0x6<<18)
#define PRIM3D_RECTLIST (0x7<<18)
#define PRIM3D_POINTLIST (0x8<<18)
#define PRIM3D_DIB (0x9<<18)
#define PRIM3D_MASK (0x1f<<18)
 
#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22))
 
#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22))
 
#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22))
 
#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22))
# define XY_TEXT_BYTE_PACKED (1 << 16)
 
/* BR00 */
#define XY_BLT_WRITE_ALPHA (1 << 21)
#define XY_BLT_WRITE_RGB (1 << 20)
#define XY_SRC_TILED (1 << 15)
#define XY_DST_TILED (1 << 11)
 
/* BR13 */
#define BR13_8 (0x0 << 24)
#define BR13_565 (0x1 << 24)
#define BR13_8888 (0x3 << 24)
 
#define FENCE_LINEAR 0
#define FENCE_XMAJOR 1
#define FENCE_YMAJOR 2
 
/* Pipeline Statistics Counter Registers */
#define IA_VERTICES_COUNT 0x2310
#define IA_PRIMITIVES_COUNT 0x2318
#define VS_INVOCATION_COUNT 0x2320
#define HS_INVOCATION_COUNT 0x2300
#define DS_INVOCATION_COUNT 0x2308
#define GS_INVOCATION_COUNT 0x2328
#define GS_PRIMITIVES_COUNT 0x2330
#define CL_INVOCATION_COUNT 0x2338
#define CL_PRIMITIVES_COUNT 0x2340
#define PS_INVOCATION_COUNT 0x2348
#define PS_DEPTH_COUNT 0x2350
 
#define SO_NUM_PRIM_STORAGE_NEEDED 0x2280
#define SO_PRIM_STORAGE_NEEDED0_IVB 0x5240
#define SO_PRIM_STORAGE_NEEDED1_IVB 0x5248
#define SO_PRIM_STORAGE_NEEDED2_IVB 0x5250
#define SO_PRIM_STORAGE_NEEDED3_IVB 0x5258
 
#define SO_NUM_PRIMS_WRITTEN 0x2288
#define SO_NUM_PRIMS_WRITTEN0_IVB 0x5200
#define SO_NUM_PRIMS_WRITTEN1_IVB 0x5208
#define SO_NUM_PRIMS_WRITTEN2_IVB 0x5210
#define SO_NUM_PRIMS_WRITTEN3_IVB 0x5218
 
#define TIMESTAMP 0x2358
 
#define BCS_SWCTRL 0x22200
# define BCS_SWCTRL_SRC_Y (1 << 0)
# define BCS_SWCTRL_DST_Y (1 << 1)
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_cs.c
0,0 → 1,38
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "ilo_shader_internal.h"
 
/**
* Compile the compute shader.
*/
struct ilo_shader *
ilo_shader_compile_cs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
return NULL;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
0,0 → 1,1799
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_util.h"
#include "toy_compiler.h"
#include "toy_tgsi.h"
#include "toy_legalize.h"
#include "toy_optimize.h"
#include "toy_helpers.h"
#include "ilo_context.h"
#include "ilo_shader_internal.h"
 
struct fs_compile_context {
struct ilo_shader *shader;
const struct ilo_shader_variant *variant;
 
struct toy_compiler tc;
struct toy_tgsi tgsi;
 
enum brw_message_target const_cache;
int dispatch_mode;
 
struct {
int barycentric_interps[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
int source_depth;
int source_w;
int pos_offset;
} payloads[2];
 
int first_const_grf;
int first_attr_grf;
int first_free_grf;
int last_free_grf;
 
int num_grf_per_vrf;
 
int first_free_mrf;
int last_free_mrf;
};
 
static void
fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
{
struct toy_compiler *tc = &fcc->tc;
const struct toy_src src_z =
tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
const struct toy_src src_w =
tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
const int fb_height =
(fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
const bool origin_upper_left =
(fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
const bool pixel_center_integer =
(fcc->tgsi.props.fs_coord_pixel_center ==
TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
struct toy_src subspan_x, subspan_y;
struct toy_dst tmp, tmp_uw;
struct toy_dst real_dst[4];
 
tdst_transpose(dst, real_dst);
 
subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
 
subspan_y = tsrc_offset(subspan_x, 0, 1);
 
tmp_uw = tdst_uw(tc_alloc_tmp(tc));
tmp = tc_alloc_tmp(tc);
 
/* X */
tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
tc_MOV(tc, tmp, tsrc_from(tmp_uw));
if (pixel_center_integer)
tc_MOV(tc, real_dst[0], tsrc_from(tmp));
else
tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
 
/* Y */
tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
tc_MOV(tc, tmp, tsrc_from(tmp_uw));
if (origin_upper_left && pixel_center_integer) {
tc_MOV(tc, real_dst[1], tsrc_from(tmp));
}
else {
struct toy_src y = tsrc_from(tmp);
float offset = 0.0f;
 
if (!pixel_center_integer)
offset += 0.5f;
 
if (!origin_upper_left) {
offset += (float) (fb_height - 1);
y = tsrc_negate(y);
}
 
tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
}
 
/* Z and W */
tc_MOV(tc, real_dst[2], src_z);
tc_INV(tc, real_dst[3], src_w);
}
 
static void
fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
{
struct toy_compiler *tc = &fcc->tc;
const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
struct toy_dst tmp_f, tmp;
struct toy_dst real_dst[4];
 
tdst_transpose(dst, real_dst);
 
tmp_f = tc_alloc_tmp(tc);
tmp = tdst_d(tmp_f);
tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
tc_MOV(tc, tmp_f, tsrc_from(tmp));
 
/* convert to 1.0 and -1.0 */
tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
 
tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
}
 
static void
fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
{
struct toy_compiler *tc = &fcc->tc;
struct toy_dst real_dst[4];
bool is_const = false;
int grf, mode, ch;
 
tdst_transpose(dst, real_dst);
 
grf = fcc->first_attr_grf + slot * 2;
 
switch (fcc->tgsi.inputs[slot].interp) {
case TGSI_INTERPOLATE_CONSTANT:
is_const = true;
break;
case TGSI_INTERPOLATE_LINEAR:
if (fcc->tgsi.inputs[slot].centroid)
mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
else
mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
break;
case TGSI_INTERPOLATE_COLOR:
if (fcc->variant->u.fs.flatshade) {
is_const = true;
break;
}
/* fall through */
case TGSI_INTERPOLATE_PERSPECTIVE:
if (fcc->tgsi.inputs[slot].centroid)
mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
else
mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
break;
default:
assert(!"unexpected FS interpolation");
mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
break;
}
 
if (is_const) {
struct toy_src a0[4];
 
a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
 
for (ch = 0; ch < 4; ch++)
tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
}
else {
struct toy_src attr[4], uv;
 
attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
 
uv = tsrc(TOY_FILE_GRF, fcc->payloads[0].barycentric_interps[mode], 0);
 
for (ch = 0; ch < 4; ch++) {
tc_add2(tc, BRW_OPCODE_PLN, real_dst[ch],
tsrc_rect(attr[ch], TOY_RECT_010), uv);
}
}
 
if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
}
}
 
static void
fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
struct toy_dst dst, int dim, int idx)
{
int slot;
 
assert(!dim);
 
slot = toy_tgsi_find_input(&fcc->tgsi, idx);
if (slot < 0)
return;
 
switch (fcc->tgsi.inputs[slot].semantic_name) {
case TGSI_SEMANTIC_POSITION:
fetch_position(fcc, dst);
break;
case TGSI_SEMANTIC_FACE:
fetch_face(fcc, dst);
break;
default:
fetch_attr(fcc, dst, slot);
break;
}
}
 
static void
fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
struct toy_dst dst, int dim,
struct toy_src idx)
{
const struct toy_dst offset =
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
struct toy_compiler *tc = &fcc->tc;
unsigned simd_mode, param_size;
struct toy_inst *inst;
struct toy_src desc, real_src[4];
struct toy_dst tmp, real_dst[4];
int i;
 
tsrc_transpose(idx, real_src);
 
/* set offset */
inst = tc_MOV(tc, offset, real_src[0]);
inst->mask_ctrl = BRW_MASK_DISABLE;
 
switch (inst->exec_size) {
case BRW_EXECUTE_8:
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
param_size = 1;
break;
case BRW_EXECUTE_16:
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
param_size = 2;
break;
default:
assert(!"unsupported execution size");
tc_MOV(tc, dst, tsrc_imm_f(0.0f));
return;
break;
}
 
desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
simd_mode,
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
0,
ILO_WM_CONST_SURFACE(dim));
 
tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
inst->mask_ctrl = BRW_MASK_DISABLE;
 
tdst_transpose(dst, real_dst);
for (i = 0; i < 4; i++) {
const struct toy_src src =
tsrc_offset(tsrc_from(tmp), param_size * i, 0);
 
/* cast to type D to make sure these are raw moves */
tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
}
}
 
static void
fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
struct toy_dst dst, int dim, struct toy_src idx)
{
const struct toy_dst header =
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
const struct toy_dst global_offset =
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
struct toy_compiler *tc = &fcc->tc;
unsigned msg_type, msg_ctrl, msg_len;
struct toy_inst *inst;
struct toy_src desc;
struct toy_dst tmp, real_dst[4];
int i;
 
/* set message header */
inst = tc_MOV(tc, header, r0);
inst->mask_ctrl = BRW_MASK_DISABLE;
 
/* set global offset */
inst = tc_MOV(tc, global_offset, idx);
inst->mask_ctrl = BRW_MASK_DISABLE;
inst->exec_size = BRW_EXECUTE_1;
inst->src[0].rect = TOY_RECT_010;
 
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
msg_ctrl = BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW << 8;
msg_len = 1;
 
desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
msg_type, msg_ctrl, ILO_WM_CONST_SURFACE(dim));
 
tmp = tc_alloc_tmp(tc);
 
tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
 
tdst_transpose(dst, real_dst);
for (i = 0; i < 4; i++) {
const struct toy_src src =
tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
 
/* cast to type D to make sure these are raw moves */
tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
}
}
 
static void
fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
struct toy_dst dst, int dim, struct toy_src idx)
{
struct toy_compiler *tc = &fcc->tc;
const struct toy_dst offset =
tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
struct toy_src desc;
struct toy_inst *inst;
struct toy_dst tmp, real_dst[4];
int i;
 
/*
* In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
* changed from OWord Block Read to ld to increase performance in the
* classic driver. Since we use the constant cache instead of the data
* cache, I wonder if we still want to follow the classic driver.
*/
 
/* set offset */
inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
inst->exec_size = BRW_EXECUTE_8;
inst->mask_ctrl = BRW_MASK_DISABLE;
 
desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
0,
ILO_WM_CONST_SURFACE(dim));
 
tmp = tc_alloc_tmp(tc);
inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
inst->exec_size = BRW_EXECUTE_8;
inst->mask_ctrl = BRW_MASK_DISABLE;
 
tdst_transpose(dst, real_dst);
for (i = 0; i < 4; i++) {
const struct toy_src src =
tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
 
/* cast to type D to make sure these are raw moves */
tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
}
}
 
static void
fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
struct toy_dst dst, int idx)
{
const uint32_t *imm;
struct toy_dst real_dst[4];
int ch;
 
imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
 
tdst_transpose(dst, real_dst);
/* raw moves */
for (ch = 0; ch < 4; ch++)
tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
}
 
static void
fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
struct toy_dst dst, int dim, int idx)
{
struct toy_compiler *tc = &fcc->tc;
const struct toy_tgsi *tgsi = &fcc->tgsi;
int slot;
 
assert(!dim);
 
slot = toy_tgsi_find_system_value(tgsi, idx);
if (slot < 0)
return;
 
switch (tgsi->system_values[slot].semantic_name) {
case TGSI_SEMANTIC_PRIMID:
case TGSI_SEMANTIC_INSTANCEID:
case TGSI_SEMANTIC_VERTEXID:
default:
tc_fail(tc, "unhandled system value");
tc_MOV(tc, dst, tsrc_imm_d(0));
break;
}
}
 
static void
fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &fcc->tc;
int dim, idx;
 
assert(inst->src[0].file == TOY_FILE_IMM);
dim = inst->src[0].val32;
 
assert(inst->src[1].file == TOY_FILE_IMM);
idx = inst->src[1].val32;
 
switch (inst->opcode) {
case TOY_OPCODE_TGSI_IN:
fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
break;
case TOY_OPCODE_TGSI_CONST:
if (tc->dev->gen >= ILO_GEN(7))
fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
else
fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
break;
case TOY_OPCODE_TGSI_SV:
fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
break;
case TOY_OPCODE_TGSI_IMM:
assert(!dim);
fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
break;
default:
tc_fail(tc, "unhandled TGSI fetch");
break;
}
 
tc_discard_inst(tc, inst);
}
 
static void
fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &fcc->tc;
enum tgsi_file_type file;
int dim, idx;
struct toy_src indirect_dim, indirect_idx;
 
assert(inst->src[0].file == TOY_FILE_IMM);
file = inst->src[0].val32;
 
assert(inst->src[1].file == TOY_FILE_IMM);
dim = inst->src[1].val32;
indirect_dim = inst->src[2];
 
assert(inst->src[3].file == TOY_FILE_IMM);
idx = inst->src[3].val32;
indirect_idx = inst->src[4];
 
/* no dimension indirection */
assert(indirect_dim.file == TOY_FILE_IMM);
dim += indirect_dim.val32;
 
switch (inst->opcode) {
case TOY_OPCODE_TGSI_INDIRECT_FETCH:
if (file == TGSI_FILE_CONSTANT) {
if (idx) {
struct toy_dst tmp = tc_alloc_tmp(tc);
 
tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
indirect_idx = tsrc_from(tmp);
}
 
fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
break;
}
/* fall through */
case TOY_OPCODE_TGSI_INDIRECT_STORE:
default:
tc_fail(tc, "unhandled TGSI indirection");
break;
}
 
tc_discard_inst(tc, inst);
}
 
/**
* Emit instructions to move sampling parameters to the message registers.
*/
static int
fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
int base_mrf, int param_size,
struct toy_src *coords, int num_coords,
struct toy_src bias_or_lod, struct toy_src ref_or_si,
struct toy_src *ddx, struct toy_src *ddy,
int num_derivs)
{
int num_params, i;
 
assert(num_coords <= 4);
assert(num_derivs <= 3 && num_derivs <= num_coords);
 
#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
switch (msg_type) {
case GEN5_SAMPLER_MESSAGE_SAMPLE:
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
num_params = num_coords;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
num_params = 5;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
num_params = 5;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
for (i = 0; i < num_derivs; i++) {
tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
}
num_params = 4 + num_derivs * 2;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
num_params = 6;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
assert(num_coords <= 3);
 
for (i = 0; i < num_coords; i++)
tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
num_params = 5;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
num_params = 1;
break;
default:
tc_fail(tc, "unknown sampler opcode");
num_params = 0;
break;
}
#undef SAMPLER_PARAM
 
return num_params * param_size;
}
 
static int
fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
int base_mrf, int param_size,
struct toy_src *coords, int num_coords,
struct toy_src bias_or_lod, struct toy_src ref_or_si,
struct toy_src *ddx, struct toy_src *ddy,
int num_derivs)
{
int num_params, i;
 
assert(num_coords <= 4);
assert(num_derivs <= 3 && num_derivs <= num_coords);
 
#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
switch (msg_type) {
case GEN5_SAMPLER_MESSAGE_SAMPLE:
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
num_params = num_coords;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
num_params = 1 + num_coords;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
num_params = 1 + num_coords;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
for (i = 0; i < num_coords; i++) {
tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
if (i < num_derivs) {
tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
}
}
num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
for (i = 0; i < num_coords; i++)
tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
num_params = 2 + num_coords;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
assert(num_coords >= 1 && num_coords <= 3);
 
tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
for (i = 1; i < num_coords; i++)
tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
num_params = 1 + num_coords;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
num_params = 1;
break;
default:
tc_fail(tc, "unknown sampler opcode");
num_params = 0;
break;
}
#undef SAMPLER_PARAM
 
return num_params * param_size;
}
 
/**
* Set up message registers and return the message descriptor for sampling.
*/
static struct toy_src
fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
int base_mrf, const uint32_t *saturate_coords,
unsigned *ret_sampler_index)
{
unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
int num_coords, ref_pos, num_derivs;
int sampler_src, param_size, i;
 
switch (inst->exec_size) {
case BRW_EXECUTE_8:
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
param_size = 1;
break;
case BRW_EXECUTE_16:
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
param_size = 2;
break;
default:
tc_fail(tc, "unsupported execute size for sampling");
return tsrc_null();
break;
}
 
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
tsrc_transpose(inst->src[0], coords);
bias_or_lod = tsrc_null();
ref_or_si = tsrc_null();
num_derivs = 0;
sampler_src = 1;
 
/*
* For TXD,
*
* src0 := (x, y, z, w)
* src1 := ddx
* src2 := ddy
* src3 := sampler
*
* For TEX2, TXB2, and TXL2,
*
* src0 := (x, y, z, w)
* src1 := (v or bias or lod, ...)
* src2 := sampler
*
* For TEX, TXB, TXL, and TXP,
*
* src0 := (x, y, z, w or bias or lod or projection)
* src1 := sampler
*
* For TXQ,
*
* src0 := (lod, ...)
* src1 := sampler
*
* For TXQ_LZ,
*
* src0 := sampler
*
* And for TXF,
*
* src0 := (x, y, z, w or lod)
* src1 := sampler
*
* State trackers should not generate opcode+texture combinations with
* which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
*/
switch (inst->opcode) {
case TOY_OPCODE_TGSI_TEX:
if (ref_pos >= 0) {
assert(ref_pos < 4);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
ref_or_si = coords[ref_pos];
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
}
break;
case TOY_OPCODE_TGSI_TXD:
if (ref_pos >= 0)
tc_fail(tc, "TXD with shadow sampler not supported");
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
tsrc_transpose(inst->src[1], ddx);
tsrc_transpose(inst->src[2], ddy);
num_derivs = num_coords;
sampler_src = 3;
break;
case TOY_OPCODE_TGSI_TXP:
if (ref_pos >= 0) {
assert(ref_pos < 3);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
ref_or_si = coords[ref_pos];
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
}
 
/* project the coordinates */
{
struct toy_dst tmp[4];
 
tc_alloc_tmp4(tc, tmp);
 
tc_INV(tc, tmp[3], coords[3]);
for (i = 0; i < num_coords && i < 3; i++) {
tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
coords[i] = tsrc_from(tmp[i]);
}
 
if (ref_pos >= i) {
tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
ref_or_si = tsrc_from(tmp[ref_pos]);
}
}
break;
case TOY_OPCODE_TGSI_TXB:
if (ref_pos >= 0) {
assert(ref_pos < 3);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
ref_or_si = coords[ref_pos];
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
}
 
bias_or_lod = coords[3];
break;
case TOY_OPCODE_TGSI_TXL:
if (ref_pos >= 0) {
assert(ref_pos < 3);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
ref_or_si = coords[ref_pos];
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
 
bias_or_lod = coords[3];
break;
case TOY_OPCODE_TGSI_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 
switch (inst->tex.target) {
case TGSI_TEXTURE_2D_MSAA:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
assert(ref_pos >= 0 && ref_pos < 4);
/* lod is always 0 */
bias_or_lod = tsrc_imm_d(0);
ref_or_si = coords[ref_pos];
break;
default:
bias_or_lod = coords[3];
break;
}
 
/* offset the coordinates */
if (!tsrc_is_null(inst->tex.offsets[0])) {
struct toy_dst tmp[4];
struct toy_src offsets[4];
 
tc_alloc_tmp4(tc, tmp);
tsrc_transpose(inst->tex.offsets[0], offsets);
 
for (i = 0; i < num_coords; i++) {
tc_ADD(tc, tmp[i], coords[i], offsets[i]);
coords[i] = tsrc_from(tmp[i]);
}
}
 
sampler_src = 1;
break;
case TOY_OPCODE_TGSI_TXQ:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
num_coords = 0;
bias_or_lod = coords[0];
break;
case TOY_OPCODE_TGSI_TXQ_LZ:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
num_coords = 0;
sampler_src = 0;
break;
case TOY_OPCODE_TGSI_TEX2:
if (ref_pos >= 0) {
assert(ref_pos < 5);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
 
if (ref_pos >= 4) {
struct toy_src src1[4];
tsrc_transpose(inst->src[1], src1);
ref_or_si = src1[ref_pos - 4];
}
else {
ref_or_si = coords[ref_pos];
}
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
}
 
sampler_src = 2;
break;
case TOY_OPCODE_TGSI_TXB2:
if (ref_pos >= 0) {
assert(ref_pos < 4);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
ref_or_si = coords[ref_pos];
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
}
 
{
struct toy_src src1[4];
tsrc_transpose(inst->src[1], src1);
bias_or_lod = src1[0];
}
 
sampler_src = 2;
break;
case TOY_OPCODE_TGSI_TXL2:
if (ref_pos >= 0) {
assert(ref_pos < 4);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
ref_or_si = coords[ref_pos];
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
 
{
struct toy_src src1[4];
tsrc_transpose(inst->src[1], src1);
bias_or_lod = src1[0];
}
 
sampler_src = 2;
break;
default:
assert(!"unhandled sampling opcode");
return tsrc_null();
break;
}
 
assert(inst->src[sampler_src].file == TOY_FILE_IMM);
sampler_index = inst->src[sampler_src].val32;
binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 18:
*
* "Note that the (cube map) coordinates delivered to the sampling
* engine must already have been divided by the component with the
* largest absolute value."
*/
switch (inst->tex.target) {
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
/* TXQ does not need coordinates */
if (num_coords >= 3) {
struct toy_dst tmp[4];
 
tc_alloc_tmp4(tc, tmp);
 
tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE);
tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE);
tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
 
for (i = 0; i < 3; i++) {
tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
coords[i] = tsrc_from(tmp[i]);
}
}
break;
}
 
/*
* Saturate (s, t, r). saturate_coords is set for sampler and coordinate
* that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
* so that sampling outside the border gets the correct colors.
*/
for (i = 0; i < MIN2(num_coords, 3); i++) {
bool is_rect;
 
if (!(saturate_coords[i] & (1 << sampler_index)))
continue;
 
switch (inst->tex.target) {
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_SHADOWRECT:
is_rect = true;
break;
default:
is_rect = false;
break;
}
 
if (is_rect) {
struct toy_src min, max;
struct toy_dst tmp;
 
tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
tmp = tc_alloc_tmp(tc);
 
/* saturate to [0, width] or [0, height] */
/* TODO TXQ? */
min = tsrc_imm_f(0.0f);
max = tsrc_imm_f(2048.0f);
 
tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G);
tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L);
 
coords[i] = tsrc_from(tmp);
}
else {
struct toy_dst tmp;
struct toy_inst *inst2;
 
tmp = tc_alloc_tmp(tc);
 
/* saturate to [0.0f, 1.0f] */
inst2 = tc_MOV(tc, tmp, coords[i]);
inst2->saturate = true;
 
coords[i] = tsrc_from(tmp);
}
}
 
/* set up sampler parameters */
if (tc->dev->gen >= ILO_GEN(7)) {
msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
}
else {
msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
}
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 136:
*
* "The maximum message length allowed to the sampler is 11. This would
* disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
* SIMD16."
*/
if (msg_len > 11)
tc_fail(tc, "maximum length for messages to the sampler is 11");
 
if (ret_sampler_index)
*ret_sampler_index = sampler_index;
 
return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
false, simd_mode, msg_type, sampler_index, binding_table_index);
}
 
static void
fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &fcc->tc;
struct toy_dst dst[4], tmp[4];
struct toy_src desc;
unsigned sampler_index;
int swizzles[4], i;
bool need_filter;
 
desc = fs_prepare_tgsi_sampling(tc, inst,
fcc->first_free_mrf,
fcc->variant->saturate_tex_coords,
&sampler_index);
 
switch (inst->opcode) {
case TOY_OPCODE_TGSI_TXF:
case TOY_OPCODE_TGSI_TXQ:
case TOY_OPCODE_TGSI_TXQ_LZ:
need_filter = false;
break;
default:
need_filter = true;
break;
}
 
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
inst->src[1] = desc;
for (i = 2; i < Elements(inst->src); i++)
inst->src[i] = tsrc_null();
 
/* write to temps first */
tc_alloc_tmp4(tc, tmp);
for (i = 0; i < 4; i++)
tmp[i].type = inst->dst.type;
tdst_transpose(inst->dst, dst);
inst->dst = tmp[0];
 
tc_move_inst(tc, inst);
 
if (need_filter) {
assert(sampler_index < fcc->variant->num_sampler_views);
swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
}
else {
swizzles[0] = PIPE_SWIZZLE_RED;
swizzles[1] = PIPE_SWIZZLE_GREEN;
swizzles[2] = PIPE_SWIZZLE_BLUE;
swizzles[3] = PIPE_SWIZZLE_ALPHA;
}
 
/* swizzle the results */
for (i = 0; i < 4; i++) {
switch (swizzles[i]) {
case PIPE_SWIZZLE_ZERO:
tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
break;
case PIPE_SWIZZLE_ONE:
tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
break;
default:
tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
break;
}
}
}
 
static void
fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_dst dst[4];
struct toy_src src[4];
int i;
 
tdst_transpose(inst->dst, dst);
tsrc_transpose(inst->src[0], src);
 
/*
* Every four fragments are from a 2x2 subspan, with
*
* fragment 1 on the top-left,
* fragment 2 on the top-right,
* fragment 3 on the bottom-left,
* fragment 4 on the bottom-right.
*
* DDX should thus produce
*
* dst = src.yyww - src.xxzz
*
* and DDY should produce
*
* dst = src.zzww - src.xxyy
*
* But since we are in BRW_ALIGN_1, swizzling does not work and we have to
* play with the region parameters.
*/
if (inst->opcode == TOY_OPCODE_DDX) {
for (i = 0; i < 4; i++) {
struct toy_src left, right;
 
left = tsrc_rect(src[i], TOY_RECT_220);
right = tsrc_offset(left, 0, 1);
 
tc_ADD(tc, dst[i], right, tsrc_negate(left));
}
}
else {
for (i = 0; i < 4; i++) {
struct toy_src top, bottom;
 
/* approximate with dst = src.zzzz - src.xxxx */
top = tsrc_rect(src[i], TOY_RECT_440);
bottom = tsrc_offset(top, 0, 2);
 
tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
}
}
 
tc_discard_inst(tc, inst);
}
 
static void
fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
{
/* fs_write_fb() has set up the message registers */
toy_compiler_lower_to_send(tc, inst, true,
GEN6_SFID_DATAPORT_RENDER_CACHE);
}
 
static void
fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_dst pixel_mask_dst;
struct toy_src f0, pixel_mask;
struct toy_inst *tmp;
 
/* lower half of r1.7:ud */
pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
 
f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, BRW_ARF_FLAG, 0)), TOY_RECT_010);
 
/* KILL or KILL_IF */
if (tsrc_is_null(inst->src[0])) {
struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, BRW_ARF_FLAG, 0));
 
/* create a mask that masks out all pixels */
tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
tmp->exec_size = BRW_EXECUTE_1;
tmp->mask_ctrl = BRW_MASK_DISABLE;
 
tc_CMP(tc, tdst_null(), dummy, dummy, BRW_CONDITIONAL_NEQ);
 
/* swapping the two src operands breaks glBitmap()!? */
tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
tmp->exec_size = BRW_EXECUTE_1;
tmp->mask_ctrl = BRW_MASK_DISABLE;
}
else {
struct toy_src src[4];
int i;
 
tsrc_transpose(inst->src[0], src);
/* mask out killed pixels */
for (i = 0; i < 4; i++) {
tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
BRW_CONDITIONAL_GE);
 
/* swapping the two src operands breaks glBitmap()!? */
tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
tmp->exec_size = BRW_EXECUTE_1;
tmp->mask_ctrl = BRW_MASK_DISABLE;
}
}
 
tc_discard_inst(tc, inst);
}
 
static void
fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
{
struct toy_compiler *tc = &fcc->tc;
struct toy_inst *inst;
 
/* lower TGSI's first, as they might be lowered to other virtual opcodes */
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case TOY_OPCODE_TGSI_IN:
case TOY_OPCODE_TGSI_CONST:
case TOY_OPCODE_TGSI_SV:
case TOY_OPCODE_TGSI_IMM:
fs_lower_opcode_tgsi_direct(fcc, inst);
break;
case TOY_OPCODE_TGSI_INDIRECT_FETCH:
case TOY_OPCODE_TGSI_INDIRECT_STORE:
fs_lower_opcode_tgsi_indirect(fcc, inst);
break;
case TOY_OPCODE_TGSI_TEX:
case TOY_OPCODE_TGSI_TXB:
case TOY_OPCODE_TGSI_TXD:
case TOY_OPCODE_TGSI_TXL:
case TOY_OPCODE_TGSI_TXP:
case TOY_OPCODE_TGSI_TXF:
case TOY_OPCODE_TGSI_TXQ:
case TOY_OPCODE_TGSI_TXQ_LZ:
case TOY_OPCODE_TGSI_TEX2:
case TOY_OPCODE_TGSI_TXB2:
case TOY_OPCODE_TGSI_TXL2:
case TOY_OPCODE_TGSI_SAMPLE:
case TOY_OPCODE_TGSI_SAMPLE_I:
case TOY_OPCODE_TGSI_SAMPLE_I_MS:
case TOY_OPCODE_TGSI_SAMPLE_B:
case TOY_OPCODE_TGSI_SAMPLE_C:
case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
case TOY_OPCODE_TGSI_SAMPLE_D:
case TOY_OPCODE_TGSI_SAMPLE_L:
case TOY_OPCODE_TGSI_GATHER4:
case TOY_OPCODE_TGSI_SVIEWINFO:
case TOY_OPCODE_TGSI_SAMPLE_POS:
case TOY_OPCODE_TGSI_SAMPLE_INFO:
fs_lower_opcode_tgsi_sampling(fcc, inst);
break;
}
}
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case TOY_OPCODE_INV:
case TOY_OPCODE_LOG:
case TOY_OPCODE_EXP:
case TOY_OPCODE_SQRT:
case TOY_OPCODE_RSQ:
case TOY_OPCODE_SIN:
case TOY_OPCODE_COS:
case TOY_OPCODE_FDIV:
case TOY_OPCODE_POW:
case TOY_OPCODE_INT_DIV_QUOTIENT:
case TOY_OPCODE_INT_DIV_REMAINDER:
toy_compiler_lower_math(tc, inst);
break;
case TOY_OPCODE_DDX:
case TOY_OPCODE_DDY:
fs_lower_opcode_derivative(tc, inst);
break;
case TOY_OPCODE_FB_WRITE:
fs_lower_opcode_fb_write(tc, inst);
break;
case TOY_OPCODE_KIL:
fs_lower_opcode_kil(tc, inst);
break;
default:
if (inst->opcode > 127)
tc_fail(tc, "unhandled virtual opcode");
break;
}
}
}
 
/**
* Compile the shader.
*/
static bool
fs_compile(struct fs_compile_context *fcc)
{
struct toy_compiler *tc = &fcc->tc;
struct ilo_shader *sh = fcc->shader;
 
fs_lower_virtual_opcodes(fcc);
toy_compiler_legalize_for_ra(tc);
toy_compiler_optimize(tc);
toy_compiler_allocate_registers(tc,
fcc->first_free_grf,
fcc->last_free_grf,
fcc->num_grf_per_vrf);
toy_compiler_legalize_for_asm(tc);
 
if (tc->fail) {
ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_FS) {
ilo_printf("legalized instructions:\n");
toy_compiler_dump(tc);
ilo_printf("\n");
}
 
if (true) {
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
}
else {
static const uint32_t microcode[] = {
/* fill in the microcode here */
0x0, 0x0, 0x0, 0x0,
};
const bool swap = true;
 
sh->kernel_size = sizeof(microcode);
sh->kernel = MALLOC(sh->kernel_size);
 
if (sh->kernel) {
const int num_dwords = sizeof(microcode) / 4;
const uint32_t *src = microcode;
uint32_t *dst = (uint32_t *) sh->kernel;
int i;
 
for (i = 0; i < num_dwords; i += 4) {
if (swap) {
dst[i + 0] = src[i + 3];
dst[i + 1] = src[i + 2];
dst[i + 2] = src[i + 1];
dst[i + 3] = src[i + 0];
}
else {
memcpy(dst, src, 16);
}
}
}
}
 
if (!sh->kernel) {
ilo_err("failed to compile FS: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_FS) {
ilo_printf("disassembly:\n");
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
ilo_printf("\n");
}
 
return true;
}
 
/**
* Emit instructions to write the color buffers (and the depth buffer).
*/
static void
fs_write_fb(struct fs_compile_context *fcc)
{
struct toy_compiler *tc = &fcc->tc;
int base_mrf = fcc->first_free_mrf;
const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
bool header_present = false;
struct toy_src desc;
unsigned msg_type, ctrl;
int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
int pos_slot = -1, cbuf, i;
 
for (i = 0; i < Elements(color_slots); i++)
color_slots[i] = -1;
 
for (i = 0; i < fcc->tgsi.num_outputs; i++) {
if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots));
color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
}
else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
pos_slot = i;
}
}
 
num_cbufs = fcc->variant->u.fs.num_cbufs;
/* still need to send EOT (and probably depth) */
if (!num_cbufs)
num_cbufs = 1;
 
/* we need the header to specify the pixel mask or render target */
if (fcc->tgsi.uses_kill || num_cbufs > 1) {
const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
struct toy_inst *inst;
 
inst = tc_MOV(tc, header, r0);
inst->mask_ctrl = BRW_MASK_DISABLE;
base_mrf += fcc->num_grf_per_vrf;
 
/* this is a two-register header */
if (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) {
inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
inst->mask_ctrl = BRW_MASK_DISABLE;
base_mrf += fcc->num_grf_per_vrf;
}
 
header_present = true;
}
 
for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
const int slot =
color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
int mrf = base_mrf, vrf;
struct toy_src src[4];
 
if (slot >= 0) {
const unsigned undefined_mask =
fcc->tgsi.outputs[slot].undefined_mask;
const int index = fcc->tgsi.outputs[slot].index;
 
vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
if (vrf >= 0) {
const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
tsrc_transpose(tmp, src);
}
else {
/* use (0, 0, 0, 0) */
tsrc_transpose(tsrc_imm_f(0.0f), src);
}
 
for (i = 0; i < 4; i++) {
const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
 
if (undefined_mask & (1 << i))
src[i] = tsrc_imm_f(0.0f);
 
tc_MOV(tc, dst, src[i]);
 
mrf += fcc->num_grf_per_vrf;
}
}
else {
/* use (0, 0, 0, 0) */
for (i = 0; i < 4; i++) {
const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
 
tc_MOV(tc, dst, tsrc_imm_f(0.0f));
mrf += fcc->num_grf_per_vrf;
}
}
 
/* select BLEND_STATE[rt] */
if (cbuf > 0) {
struct toy_inst *inst;
 
inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
inst->mask_ctrl = BRW_MASK_DISABLE;
inst->exec_size = BRW_EXECUTE_1;
inst->src[0].rect = TOY_RECT_010;
}
 
if (cbuf == 0 && pos_slot >= 0) {
const int index = fcc->tgsi.outputs[pos_slot].index;
const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
struct toy_src src[4];
int vrf;
 
vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
if (vrf >= 0) {
const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
tsrc_transpose(tmp, src);
}
else {
/* use (0, 0, 0, 0) */
tsrc_transpose(tsrc_imm_f(0.0f), src);
}
 
/* only Z */
tc_MOV(tc, dst, src[2]);
 
mrf += fcc->num_grf_per_vrf;
}
 
msg_type = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ?
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE :
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
 
ctrl = (cbuf == num_cbufs - 1) << 12 |
msg_type << 8;
 
desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
mrf - fcc->first_free_mrf, 0,
header_present, false,
GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
ctrl, ILO_WM_DRAW_SURFACE(cbuf));
 
tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
}
}
 
/**
* Set up shader outputs for fixed-function units.
*/
static void
fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
{
int i;
 
sh->out.count = tgsi->num_outputs;
for (i = 0; i < tgsi->num_outputs; i++) {
sh->out.register_indices[i] = tgsi->outputs[i].index;
sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
 
if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
sh->out.has_pos = true;
}
}
 
/**
* Set up shader inputs for fixed-function units.
*/
static void
fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
bool flatshade)
{
int i;
 
sh->in.count = tgsi->num_inputs;
for (i = 0; i < tgsi->num_inputs; i++) {
sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
sh->in.interp[i] = tgsi->inputs[i].interp;
sh->in.centroid[i] = tgsi->inputs[i].centroid;
 
if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
sh->in.has_pos = true;
continue;
}
else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
continue;
}
 
switch (tgsi->inputs[i].interp) {
case TGSI_INTERPOLATE_CONSTANT:
sh->in.const_interp_enable |= 1 << i;
break;
case TGSI_INTERPOLATE_LINEAR:
sh->in.has_linear_interp = true;
 
if (tgsi->inputs[i].centroid) {
sh->in.barycentric_interpolation_mode |=
1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
}
else {
sh->in.barycentric_interpolation_mode |=
1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
}
break;
case TGSI_INTERPOLATE_COLOR:
if (flatshade) {
sh->in.const_interp_enable |= 1 << i;
break;
}
/* fall through */
case TGSI_INTERPOLATE_PERSPECTIVE:
if (tgsi->inputs[i].centroid) {
sh->in.barycentric_interpolation_mode |=
1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
}
else {
sh->in.barycentric_interpolation_mode |=
1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
}
break;
default:
break;
}
}
}
 
static int
fs_setup_payloads(struct fs_compile_context *fcc)
{
const struct ilo_shader *sh = fcc->shader;
int grf, i;
 
grf = 0;
 
/* r0: header */
grf++;
 
/* r1-r2: coordinates and etc. */
grf += (fcc->dispatch_mode == GEN6_WM_32_DISPATCH_ENABLE) ? 2 : 1;
 
for (i = 0; i < Elements(fcc->payloads); i++) {
int interp;
 
/* r3-r26 or r32-r55: barycentric interpolation parameters */
for (interp = 0; interp < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; interp++) {
if (!(sh->in.barycentric_interpolation_mode & (1 << interp)))
continue;
 
fcc->payloads[i].barycentric_interps[interp] = grf;
grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 2 : 4;
}
 
/* r27-r28 or r56-r57: interpoloated depth */
if (sh->in.has_pos) {
fcc->payloads[i].source_depth = grf;
grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
}
 
/* r29-r30 or r58-r59: interpoloated w */
if (sh->in.has_pos) {
fcc->payloads[i].source_w = grf;
grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
}
 
/* r31 or r60: position offset */
if (false) {
fcc->payloads[i].pos_offset = grf;
grf++;
}
 
if (fcc->dispatch_mode != GEN6_WM_32_DISPATCH_ENABLE)
break;
}
 
return grf;
}
 
/**
* Translate the TGSI tokens.
*/
static bool
fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
struct toy_tgsi *tgsi)
{
if (ilo_debug & ILO_DEBUG_FS) {
ilo_printf("dumping fragment shader\n");
ilo_printf("\n");
 
tgsi_dump(tokens, 0);
ilo_printf("\n");
}
 
toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
if (tc->fail) {
ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_FS) {
ilo_printf("TGSI translator:\n");
toy_tgsi_dump(tgsi);
ilo_printf("\n");
toy_compiler_dump(tc);
ilo_printf("\n");
}
 
return true;
}
 
/**
* Set up FS compile context. This includes translating the TGSI tokens.
*/
static bool
fs_setup(struct fs_compile_context *fcc,
const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
int num_consts;
 
memset(fcc, 0, sizeof(*fcc));
 
fcc->shader = CALLOC_STRUCT(ilo_shader);
if (!fcc->shader)
return false;
 
fcc->variant = variant;
 
toy_compiler_init(&fcc->tc, state->info.dev);
 
fcc->dispatch_mode = GEN6_WM_8_DISPATCH_ENABLE;
 
fcc->tc.templ.access_mode = BRW_ALIGN_1;
if (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) {
fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1H;
fcc->tc.templ.exec_size = BRW_EXECUTE_16;
}
else {
fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1Q;
fcc->tc.templ.exec_size = BRW_EXECUTE_8;
}
 
fcc->tc.rect_linear_width = 8;
 
/*
* The classic driver uses the sampler cache (gen6) or the data cache
* (gen7). Why?
*/
fcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
 
if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
toy_compiler_cleanup(&fcc->tc);
FREE(fcc->shader);
return false;
}
 
fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
fs_setup_shader_out(fcc->shader, &fcc->tgsi);
 
/* we do not make use of push constant buffers yet */
num_consts = 0;
 
fcc->first_const_grf = fs_setup_payloads(fcc);
fcc->first_attr_grf = fcc->first_const_grf + num_consts;
fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
fcc->last_free_grf = 127;
 
/* m0 is reserved for system routines */
fcc->first_free_mrf = 1;
fcc->last_free_mrf = 15;
 
/* instructions are compressed with BRW_EXECUTE_16 */
fcc->num_grf_per_vrf =
(fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? 2 : 1;
 
if (fcc->tc.dev->gen >= ILO_GEN(7)) {
fcc->last_free_grf -= 15;
fcc->first_free_mrf = fcc->last_free_grf + 1;
fcc->last_free_mrf = fcc->first_free_mrf + 14;
}
 
fcc->shader->in.start_grf = fcc->first_const_grf;
fcc->shader->has_kill = fcc->tgsi.uses_kill;
fcc->shader->dispatch_16 =
(fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE);
 
return true;
}
 
/**
* Compile the fragment shader.
*/
struct ilo_shader *
ilo_shader_compile_fs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
struct fs_compile_context fcc;
 
if (!fs_setup(&fcc, state, variant))
return NULL;
 
fs_write_fb(&fcc);
 
if (!fs_compile(&fcc)) {
FREE(fcc.shader);
fcc.shader = NULL;
}
 
toy_tgsi_cleanup(&fcc.tgsi);
toy_compiler_cleanup(&fcc.tc);
 
return fcc.shader;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_gs.c
0,0 → 1,1449
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "tgsi/tgsi_dump.h"
#include "toy_compiler.h"
#include "toy_tgsi.h"
#include "toy_legalize.h"
#include "toy_optimize.h"
#include "toy_helpers.h"
#include "ilo_shader_internal.h"
 
/* XXX Below is proof-of-concept code. Skip this file! */
 
/*
* TODO
* - primitive id is in r0.1. FS receives PID as a flat attribute.
* - set VUE header m0.1 for layered rendering
*/
struct gs_compile_context {
struct ilo_shader *shader;
const struct ilo_shader_variant *variant;
const struct pipe_stream_output_info *so_info;
 
struct toy_compiler tc;
struct toy_tgsi tgsi;
int output_map[PIPE_MAX_SHADER_OUTPUTS];
 
bool write_so;
bool write_vue;
 
int in_vue_size;
int in_vue_count;
 
int out_vue_size;
int out_vue_min_count;
 
bool is_static;
 
struct {
struct toy_src header;
struct toy_src svbi;
struct toy_src vues[6];
} payload;
 
struct {
struct toy_dst urb_write_header;
bool prim_start;
bool prim_end;
int prim_type;
 
struct toy_dst tmp;
 
/* buffered tgsi_outs */
struct toy_dst buffers[3];
int buffer_needed, buffer_cur;
 
struct toy_dst so_written;
struct toy_dst so_index;
 
struct toy_src tgsi_outs[PIPE_MAX_SHADER_OUTPUTS];
} vars;
 
struct {
struct toy_dst total_vertices;
struct toy_dst total_prims;
 
struct toy_dst num_vertices;
struct toy_dst num_vertices_in_prim;
} dynamic_data;
 
struct {
int total_vertices;
int total_prims;
/* this limits the max vertice count to be 256 */
uint32_t last_vertex[8];
 
int num_vertices;
int num_vertices_in_prim;
} static_data;
 
int first_free_grf;
int last_free_grf;
int first_free_mrf;
int last_free_mrf;
};
 
static void
gs_COPY8(struct toy_compiler *tc, struct toy_dst dst, struct toy_src src)
{
struct toy_inst *inst;
 
inst = tc_MOV(tc, dst, src);
inst->exec_size = BRW_EXECUTE_8;
inst->mask_ctrl = BRW_MASK_DISABLE;
}
 
static void
gs_COPY4(struct toy_compiler *tc,
struct toy_dst dst, int dst_ch,
struct toy_src src, int src_ch)
{
struct toy_inst *inst;
 
inst = tc_MOV(tc,
tdst_offset(dst, 0, dst_ch),
tsrc_offset(src, 0, src_ch));
inst->exec_size = BRW_EXECUTE_4;
inst->mask_ctrl = BRW_MASK_DISABLE;
}
 
static void
gs_COPY1(struct toy_compiler *tc,
struct toy_dst dst, int dst_ch,
struct toy_src src, int src_ch)
{
struct toy_inst *inst;
 
inst = tc_MOV(tc,
tdst_offset(dst, 0, dst_ch),
tsrc_rect(tsrc_offset(src, 0, src_ch), TOY_RECT_010));
inst->exec_size = BRW_EXECUTE_1;
inst->mask_ctrl = BRW_MASK_DISABLE;
}
 
static void
gs_init_vars(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_dst dst;
 
/* init URB_WRITE header */
dst = gcc->vars.urb_write_header;
 
gs_COPY8(tc, dst, gcc->payload.header);
 
gcc->vars.prim_start = true;
gcc->vars.prim_end = false;
switch (gcc->out_vue_min_count) {
case 1:
gcc->vars.prim_type = _3DPRIM_POINTLIST;
break;
case 2:
gcc->vars.prim_type = _3DPRIM_LINESTRIP;
break;
case 3:
gcc->vars.prim_type = _3DPRIM_TRISTRIP;
break;
}
 
if (gcc->write_so)
tc_MOV(tc, gcc->vars.so_written, tsrc_imm_d(0));
}
 
static void
gs_save_output(struct gs_compile_context *gcc, const struct toy_src *outs)
{
struct toy_compiler *tc = &gcc->tc;
const struct toy_dst buf = gcc->vars.buffers[gcc->vars.buffer_cur];
int i;
 
for (i = 0; i < gcc->shader->out.count; i++)
tc_MOV(tc, tdst_offset(buf, i, 0), outs[i]);
 
/* advance the cursor */
gcc->vars.buffer_cur++;
gcc->vars.buffer_cur %= gcc->vars.buffer_needed;
}
 
static void
gs_write_so(struct gs_compile_context *gcc,
struct toy_dst dst,
struct toy_src index, struct toy_src out,
bool send_write_commit_message,
int binding_table_index)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_dst mrf_header;
struct toy_src desc;
 
mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
 
/* m0.5: destination index */
gs_COPY1(tc, mrf_header, 5, index, 0);
 
/* m0.0 - m0.3: RGBA */
gs_COPY4(tc, mrf_header, 0, tsrc_type(out, mrf_header.type), 0);
 
desc = tsrc_imm_mdesc_data_port(tc, false,
1, send_write_commit_message,
true, send_write_commit_message,
GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 0,
binding_table_index);
 
tc_SEND(tc, dst, tsrc_from(mrf_header), desc,
GEN6_SFID_DATAPORT_RENDER_CACHE);
}
 
static void
gs_write_vue(struct gs_compile_context *gcc,
struct toy_dst dst, struct toy_src msg_header,
const struct toy_src *outs, int num_outs,
bool eot)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_dst mrf_header;
struct toy_src desc;
int sent = 0;
 
mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
gs_COPY8(tc, mrf_header, msg_header);
 
while (sent < num_outs) {
int mrf = gcc->first_free_mrf + 1;
const int mrf_avail = gcc->last_free_mrf - mrf + 1;
int msg_len, num_entries, i;
bool complete;
 
num_entries = (num_outs - sent + 1) / 2;
complete = true;
if (num_entries > mrf_avail) {
num_entries = mrf_avail;
complete = false;
}
 
for (i = 0; i < num_entries; i++) {
gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 0,
outs[sent + 2 * i], 0);
if (sent + i * 2 + 1 < gcc->shader->out.count) {
gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 4,
outs[sent + 2 * i + 1], 0);
}
mrf++;
}
 
/* do not forget the header */
msg_len = num_entries + 1;
 
if (complete) {
desc = tsrc_imm_mdesc_urb(tc,
eot, msg_len, !eot, true, true, !eot,
BRW_URB_SWIZZLE_NONE, sent, 0);
}
else {
desc = tsrc_imm_mdesc_urb(tc,
false, msg_len, 0, false, true, false,
BRW_URB_SWIZZLE_NONE, sent, 0);
}
 
tc_add2(tc, TOY_OPCODE_URB_WRITE,
(complete) ? dst : tdst_null(), tsrc_from(mrf_header), desc);
 
sent += num_entries * 2;
}
}
 
static void
gs_ff_sync(struct gs_compile_context *gcc, struct toy_dst dst,
struct toy_src num_prims)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_dst mrf_header =
tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
struct toy_src desc;
bool allocate;
 
gs_COPY8(tc, mrf_header, gcc->payload.header);
 
/* set NumSOVertsToWrite and NumSOPrimsNeeded */
if (gcc->write_so) {
if (num_prims.file == TOY_FILE_IMM) {
const uint32_t v =
(num_prims.val32 * gcc->in_vue_count) << 16 | num_prims.val32;
 
gs_COPY1(tc, mrf_header, 0, tsrc_imm_d(v), 0);
}
else {
struct toy_dst m0_0 = tdst_d(gcc->vars.tmp);
 
tc_MUL(tc, m0_0, num_prims, tsrc_imm_d(gcc->in_vue_count << 16));
tc_OR(tc, m0_0, tsrc_from(m0_0), num_prims);
 
gs_COPY1(tc, mrf_header, 0, tsrc_from(m0_0), 0);
}
}
 
/* set NumGSPrimsGenerated */
if (gcc->write_vue)
gs_COPY1(tc, mrf_header, 1, num_prims, 0);
 
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 173:
*
* "Programming Note: If the GS stage is enabled, software must always
* allocate at least one GS URB Entry. This is true even if the GS
* thread never needs to output vertices to the pipeline, e.g., when
* only performing stream output. This is an artifact of the need to
* pass the GS thread an initial destination URB handle."
*/
allocate = true;
desc = tsrc_imm_mdesc_urb(tc, false, 1, 1,
false, false, allocate,
BRW_URB_SWIZZLE_NONE, 0, 1);
 
tc_SEND(tc, dst, tsrc_from(mrf_header), desc, BRW_SFID_URB);
}
 
static void
gs_discard(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_dst mrf_header;
struct toy_src desc;
 
mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
 
gs_COPY8(tc, mrf_header, tsrc_from(gcc->vars.urb_write_header));
 
desc = tsrc_imm_mdesc_urb(tc,
true, 1, 0, true, false, false,
BRW_URB_SWIZZLE_NONE, 0, 0);
 
tc_add2(tc, TOY_OPCODE_URB_WRITE,
tdst_null(), tsrc_from(mrf_header), desc);
}
 
static void
gs_lower_opcode_endprim(struct gs_compile_context *gcc, struct toy_inst *inst)
{
/* if has control flow, set PrimEnd on the last vertex and URB_WRITE */
}
 
static void
gs_lower_opcode_emit_vue_dynamic(struct gs_compile_context *gcc)
{
/* TODO similar to the static version */
 
/*
* When SO is enabled and the inputs are lines or triangles, vertices are
* always buffered. we can defer the emission of the current vertex until
* the next EMIT or ENDPRIM. Or, we can emit two URB_WRITEs with the later
* patching the former.
*/
}
 
static void
gs_lower_opcode_emit_so_dynamic(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
 
tc_IF(tc, tdst_null(),
tsrc_from(gcc->dynamic_data.num_vertices_in_prim),
tsrc_imm_d(gcc->out_vue_min_count),
BRW_CONDITIONAL_GE);
 
{
tc_ADD(tc, gcc->vars.tmp, tsrc_from(gcc->vars.so_index), tsrc_imm_d(0x03020100));
 
/* TODO same as static version */
}
 
tc_ENDIF(tc);
 
tc_ADD(tc, gcc->vars.so_index,
tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
}
 
static void
gs_lower_opcode_emit_vue_static(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_inst *inst2;
bool eot;
 
eot = (gcc->static_data.num_vertices == gcc->static_data.total_vertices);
 
gcc->vars.prim_end =
((gcc->static_data.last_vertex[(gcc->static_data.num_vertices - 1) / 32] &
1 << ((gcc->static_data.num_vertices - 1) % 32)) != 0);
 
if (eot && gcc->write_so) {
inst2 = tc_OR(tc, tdst_offset(gcc->vars.urb_write_header, 0, 2),
tsrc_from(gcc->vars.so_written),
tsrc_imm_d(gcc->vars.prim_type << 2 |
gcc->vars.prim_start << 1 |
gcc->vars.prim_end));
inst2->exec_size = BRW_EXECUTE_1;
inst2->src[0] = tsrc_rect(inst2->src[0], TOY_RECT_010);
inst2->src[1] = tsrc_rect(inst2->src[1], TOY_RECT_010);
}
else {
gs_COPY1(tc, gcc->vars.urb_write_header, 2,
tsrc_imm_d(gcc->vars.prim_type << 2 |
gcc->vars.prim_start << 1 |
gcc->vars.prim_end), 0);
}
 
gs_write_vue(gcc, tdst_d(gcc->vars.tmp),
tsrc_from(gcc->vars.urb_write_header),
gcc->vars.tgsi_outs,
gcc->shader->out.count, eot);
 
if (!eot) {
gs_COPY1(tc, gcc->vars.urb_write_header, 0,
tsrc_from(tdst_d(gcc->vars.tmp)), 0);
}
 
gcc->vars.prim_start = gcc->vars.prim_end;
gcc->vars.prim_end = false;
}
 
static void
gs_lower_opcode_emit_so_static(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_inst *inst;
int i, j;
 
if (gcc->static_data.num_vertices_in_prim < gcc->out_vue_min_count)
return;
 
inst = tc_MOV(tc, tdst_w(gcc->vars.tmp), tsrc_imm_v(0x03020100));
inst->exec_size = BRW_EXECUTE_8;
inst->mask_ctrl = BRW_MASK_DISABLE;
 
tc_ADD(tc, tdst_d(gcc->vars.tmp), tsrc_from(tdst_d(gcc->vars.tmp)),
tsrc_rect(tsrc_from(gcc->vars.so_index), TOY_RECT_010));
 
tc_IF(tc, tdst_null(),
tsrc_rect(tsrc_offset(tsrc_from(tdst_d(gcc->vars.tmp)), 0, gcc->out_vue_min_count - 1), TOY_RECT_010),
tsrc_rect(tsrc_offset(gcc->payload.svbi, 0, 4), TOY_RECT_010),
BRW_CONDITIONAL_LE);
{
for (i = 0; i < gcc->out_vue_min_count; i++) {
for (j = 0; j < gcc->so_info->num_outputs; j++) {
const int idx = gcc->so_info->output[j].register_index;
struct toy_src index, out;
int binding_table_index;
bool write_commit;
 
index = tsrc_d(tsrc_offset(tsrc_from(gcc->vars.tmp), 0, i));
 
if (i == gcc->out_vue_min_count - 1) {
out = gcc->vars.tgsi_outs[idx];
}
else {
/* gcc->vars.buffer_cur also points to the first vertex */
const int buf =
(gcc->vars.buffer_cur + i) % gcc->vars.buffer_needed;
 
out = tsrc_offset(tsrc_from(gcc->vars.buffers[buf]), idx, 0);
}
 
out = tsrc_offset(out, 0, gcc->so_info->output[j].start_component);
 
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 19:
*
* "The Kernel must do a write commit on the last write to DAP
* prior to a URB_WRITE with End of Thread."
*/
write_commit =
(gcc->static_data.num_vertices == gcc->static_data.total_vertices &&
i == gcc->out_vue_min_count - 1 &&
j == gcc->so_info->num_outputs - 1);
 
 
binding_table_index = ILO_GS_SO_SURFACE(j);
 
gs_write_so(gcc, gcc->vars.tmp, index,
out, write_commit, binding_table_index);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 168:
*
* "The write commit does not modify the destination register, but
* merely clears the dependency associated with the destination
* register. Thus, a simple "mov" instruction using the register as a
* source is sufficient to wait for the write commit to occur."
*/
if (write_commit)
tc_MOV(tc, gcc->vars.tmp, tsrc_from(gcc->vars.tmp));
}
}
 
/* SONumPrimsWritten occupies the higher word of m0.2 of URB_WRITE */
tc_ADD(tc, gcc->vars.so_written,
tsrc_from(gcc->vars.so_written), tsrc_imm_d(1 << 16));
tc_ADD(tc, gcc->vars.so_index,
tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
}
tc_ENDIF(tc);
}
 
static void
gs_lower_opcode_emit_static(struct gs_compile_context *gcc,
struct toy_inst *inst)
{
gcc->static_data.num_vertices++;
gcc->static_data.num_vertices_in_prim++;
 
if (gcc->write_so) {
gs_lower_opcode_emit_so_static(gcc);
 
if (gcc->out_vue_min_count > 1 &&
gcc->static_data.num_vertices != gcc->static_data.total_vertices)
gs_save_output(gcc, gcc->vars.tgsi_outs);
}
 
if (gcc->write_vue)
gs_lower_opcode_emit_vue_static(gcc);
}
 
static void
gs_lower_opcode_emit_dynamic(struct gs_compile_context *gcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &gcc->tc;
 
tc_ADD(tc, gcc->dynamic_data.num_vertices,
tsrc_from(gcc->dynamic_data.num_vertices), tsrc_imm_d(1));
tc_ADD(tc, gcc->dynamic_data.num_vertices_in_prim,
tsrc_from(gcc->dynamic_data.num_vertices_in_prim), tsrc_imm_d(1));
 
if (gcc->write_so) {
gs_lower_opcode_emit_so_dynamic(gcc);
 
if (gcc->out_vue_min_count > 1)
gs_save_output(gcc, gcc->vars.tgsi_outs);
}
 
if (gcc->write_vue)
gs_lower_opcode_emit_vue_dynamic(gcc);
}
 
static void
gs_lower_opcode_emit(struct gs_compile_context *gcc, struct toy_inst *inst)
{
if (gcc->is_static)
gs_lower_opcode_emit_static(gcc, inst);
else
gs_lower_opcode_emit_dynamic(gcc, inst);
}
 
static void
gs_lower_opcode_tgsi_in(struct gs_compile_context *gcc,
struct toy_dst dst, int dim, int idx)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_src attr;
int slot, reg = -1, subreg;
 
slot = toy_tgsi_find_input(&gcc->tgsi, idx);
if (slot >= 0) {
int i;
 
for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
if (gcc->variant->u.gs.semantic_names[i] ==
gcc->tgsi.inputs[slot].semantic_name &&
gcc->variant->u.gs.semantic_indices[i] ==
gcc->tgsi.inputs[slot].semantic_index) {
reg = i / 2;
subreg = (i % 2) * 4;
break;
}
}
}
 
if (reg < 0) {
tc_MOV(tc, dst, tsrc_imm_f(0.0f));
return;
}
 
/* fix vertex ordering for _3DPRIM_TRISTRIP_REVERSE */
if (gcc->in_vue_count == 3 && dim < 2) {
struct toy_inst *inst;
 
/* get PrimType */
inst = tc_AND(tc, tdst_d(gcc->vars.tmp),
tsrc_offset(gcc->payload.header, 0, 2), tsrc_imm_d(0x1f));
inst->exec_size = BRW_EXECUTE_1;
inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
inst->src[1] = tsrc_rect(inst->src[1], TOY_RECT_010);
 
inst = tc_CMP(tc, tdst_null(), tsrc_from(tdst_d(gcc->vars.tmp)),
tsrc_imm_d(_3DPRIM_TRISTRIP_REVERSE), BRW_CONDITIONAL_NEQ);
inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
 
attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
inst = tc_MOV(tc, dst, attr);
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
 
/* swap IN[0] and IN[1] for _3DPRIM_TRISTRIP_REVERSE */
dim = !dim;
 
attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
inst = tc_MOV(tc, dst, attr);
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
inst->pred_inv = true;
}
else {
attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
tc_MOV(tc, dst, attr);
}
 
 
}
 
static void
gs_lower_opcode_tgsi_imm(struct gs_compile_context *gcc,
struct toy_dst dst, int idx)
{
const uint32_t *imm;
int ch;
 
imm = toy_tgsi_get_imm(&gcc->tgsi, idx, NULL);
 
for (ch = 0; ch < 4; ch++) {
struct toy_inst *inst;
 
/* raw moves */
inst = tc_MOV(&gcc->tc,
tdst_writemask(tdst_ud(dst), 1 << ch),
tsrc_imm_ud(imm[ch]));
inst->access_mode = BRW_ALIGN_16;
}
}
 
static void
gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &gcc->tc;
int dim, idx;
 
assert(inst->src[0].file == TOY_FILE_IMM);
dim = inst->src[0].val32;
 
assert(inst->src[1].file == TOY_FILE_IMM);
idx = inst->src[1].val32;
 
switch (inst->opcode) {
case TOY_OPCODE_TGSI_IN:
gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx);
/* fetch all dimensions */
if (dim == 0) {
int i;
 
for (i = 1; i < gcc->in_vue_count; i++) {
const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx);
struct toy_dst dst;
 
if (vrf < 0)
continue;
 
dst = tdst(TOY_FILE_VRF, vrf, 0);
gs_lower_opcode_tgsi_in(gcc, dst, i, idx);
}
}
break;
case TOY_OPCODE_TGSI_IMM:
assert(!dim);
gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx);
break;
case TOY_OPCODE_TGSI_CONST:
case TOY_OPCODE_TGSI_SV:
default:
tc_fail(tc, "unhandled TGSI fetch");
break;
}
 
tc_discard_inst(tc, inst);
}
 
static void
gs_lower_virtual_opcodes(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
struct toy_inst *inst;
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case TOY_OPCODE_TGSI_IN:
case TOY_OPCODE_TGSI_CONST:
case TOY_OPCODE_TGSI_SV:
case TOY_OPCODE_TGSI_IMM:
gs_lower_opcode_tgsi_direct(gcc, inst);
break;
case TOY_OPCODE_TGSI_INDIRECT_FETCH:
case TOY_OPCODE_TGSI_INDIRECT_STORE:
/* TODO similar to VS */
tc_fail(tc, "no indirection support");
tc_discard_inst(tc, inst);
break;
case TOY_OPCODE_TGSI_TEX:
case TOY_OPCODE_TGSI_TXB:
case TOY_OPCODE_TGSI_TXD:
case TOY_OPCODE_TGSI_TXL:
case TOY_OPCODE_TGSI_TXP:
case TOY_OPCODE_TGSI_TXF:
case TOY_OPCODE_TGSI_TXQ:
case TOY_OPCODE_TGSI_TXQ_LZ:
case TOY_OPCODE_TGSI_TEX2:
case TOY_OPCODE_TGSI_TXB2:
case TOY_OPCODE_TGSI_TXL2:
case TOY_OPCODE_TGSI_SAMPLE:
case TOY_OPCODE_TGSI_SAMPLE_I:
case TOY_OPCODE_TGSI_SAMPLE_I_MS:
case TOY_OPCODE_TGSI_SAMPLE_B:
case TOY_OPCODE_TGSI_SAMPLE_C:
case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
case TOY_OPCODE_TGSI_SAMPLE_D:
case TOY_OPCODE_TGSI_SAMPLE_L:
case TOY_OPCODE_TGSI_GATHER4:
case TOY_OPCODE_TGSI_SVIEWINFO:
case TOY_OPCODE_TGSI_SAMPLE_POS:
case TOY_OPCODE_TGSI_SAMPLE_INFO:
/* TODO similar to VS */
tc_fail(tc, "no sampling support");
tc_discard_inst(tc, inst);
break;
case TOY_OPCODE_EMIT:
gs_lower_opcode_emit(gcc, inst);
tc_discard_inst(tc, inst);
break;
case TOY_OPCODE_ENDPRIM:
gs_lower_opcode_endprim(gcc, inst);
tc_discard_inst(tc, inst);
break;
default:
break;
}
}
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case TOY_OPCODE_INV:
case TOY_OPCODE_LOG:
case TOY_OPCODE_EXP:
case TOY_OPCODE_SQRT:
case TOY_OPCODE_RSQ:
case TOY_OPCODE_SIN:
case TOY_OPCODE_COS:
case TOY_OPCODE_FDIV:
case TOY_OPCODE_POW:
case TOY_OPCODE_INT_DIV_QUOTIENT:
case TOY_OPCODE_INT_DIV_REMAINDER:
toy_compiler_lower_math(tc, inst);
break;
case TOY_OPCODE_URB_WRITE:
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB);
break;
default:
if (inst->opcode > 127)
tc_fail(tc, "unhandled virtual opcode");
break;
}
}
}
 
/**
* Get the number of (tessellated) primitives generated by this shader.
* Return false if that is unknown until runtime.
*/
static void
get_num_prims_static(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
const struct toy_inst *inst;
int num_vertices_in_prim = 0, if_depth = 0, do_depth = 0;
bool is_static = true;
 
tc_head(tc);
while ((inst = tc_next_no_skip(tc)) != NULL) {
switch (inst->opcode) {
case BRW_OPCODE_IF:
if_depth++;
break;
case BRW_OPCODE_ENDIF:
if_depth--;
break;
case BRW_OPCODE_DO:
do_depth++;
break;
case BRW_OPCODE_WHILE:
do_depth--;
break;
case TOY_OPCODE_EMIT:
if (if_depth || do_depth) {
is_static = false;
}
else {
gcc->static_data.total_vertices++;
 
num_vertices_in_prim++;
if (num_vertices_in_prim >= gcc->out_vue_min_count)
gcc->static_data.total_prims++;
}
break;
case TOY_OPCODE_ENDPRIM:
if (if_depth || do_depth) {
is_static = false;
}
else {
const int vertidx = gcc->static_data.total_vertices - 1;
const int idx = vertidx / 32;
const int subidx = vertidx % 32;
 
gcc->static_data.last_vertex[idx] |= 1 << subidx;
num_vertices_in_prim = 0;
}
break;
default:
break;
}
 
if (!is_static)
break;
}
 
gcc->is_static = is_static;
}
 
/**
* Compile the shader.
*/
static bool
gs_compile(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
struct ilo_shader *sh = gcc->shader;
 
get_num_prims_static(gcc);
 
if (gcc->is_static) {
tc_head(tc);
 
gs_init_vars(gcc);
gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
if (gcc->write_so)
gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
 
tc_tail(tc);
}
else {
tc_fail(tc, "no control flow support");
return false;
}
 
if (!gcc->write_vue)
gs_discard(gcc);
 
gs_lower_virtual_opcodes(gcc);
toy_compiler_legalize_for_ra(tc);
toy_compiler_optimize(tc);
toy_compiler_allocate_registers(tc,
gcc->first_free_grf,
gcc->last_free_grf,
1);
toy_compiler_legalize_for_asm(tc);
 
if (tc->fail) {
ilo_err("failed to legalize GS instructions: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_GS) {
ilo_printf("legalized instructions:\n");
toy_compiler_dump(tc);
ilo_printf("\n");
}
 
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
if (!sh->kernel)
return false;
 
if (ilo_debug & ILO_DEBUG_GS) {
ilo_printf("disassembly:\n");
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
ilo_printf("\n");
}
 
return true;
}
 
static bool
gs_compile_passthrough(struct gs_compile_context *gcc)
{
struct toy_compiler *tc = &gcc->tc;
struct ilo_shader *sh = gcc->shader;
 
gcc->is_static = true;
gcc->static_data.total_vertices = gcc->in_vue_count;
gcc->static_data.total_prims = 1;
gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1);
 
gs_init_vars(gcc);
gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
if (gcc->write_so)
gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
 
{
int vert, attr;
 
for (vert = 0; vert < gcc->out_vue_min_count; vert++) {
for (attr = 0; attr < gcc->shader->out.count; attr++) {
tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]),
tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4));
}
 
gs_lower_opcode_emit(gcc, NULL);
}
 
gs_lower_opcode_endprim(gcc, NULL);
}
 
if (!gcc->write_vue)
gs_discard(gcc);
 
gs_lower_virtual_opcodes(gcc);
 
toy_compiler_legalize_for_ra(tc);
toy_compiler_optimize(tc);
toy_compiler_allocate_registers(tc,
gcc->first_free_grf,
gcc->last_free_grf,
1);
 
toy_compiler_legalize_for_asm(tc);
 
if (tc->fail) {
ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_GS) {
int i;
 
ilo_printf("VUE count %d, VUE size %d\n",
gcc->in_vue_count, gcc->in_vue_size);
ilo_printf("%srasterizer discard\n",
(gcc->variant->u.gs.rasterizer_discard) ? "" : "no ");
 
for (i = 0; i < gcc->so_info->num_outputs; i++) {
ilo_printf("SO[%d] = OUT[%d]\n", i,
gcc->so_info->output[i].register_index);
}
 
ilo_printf("legalized instructions:\n");
toy_compiler_dump(tc);
ilo_printf("\n");
}
 
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
if (!sh->kernel) {
ilo_err("failed to compile GS: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_GS) {
ilo_printf("disassembly:\n");
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
ilo_printf("\n");
}
 
return true;
}
 
/**
* Translate the TGSI tokens.
*/
static bool
gs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
struct toy_tgsi *tgsi)
{
if (ilo_debug & ILO_DEBUG_GS) {
ilo_printf("dumping geometry shader\n");
ilo_printf("\n");
 
tgsi_dump(tokens, 0);
ilo_printf("\n");
}
 
toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
if (tc->fail)
return false;
 
if (ilo_debug & ILO_DEBUG_GS) {
ilo_printf("TGSI translator:\n");
toy_tgsi_dump(tgsi);
ilo_printf("\n");
toy_compiler_dump(tc);
ilo_printf("\n");
}
 
return true;
}
 
/**
* Set up shader inputs for fixed-function units.
*/
static void
gs_setup_shader_in(struct ilo_shader *sh,
const struct ilo_shader_variant *variant)
{
int i;
 
for (i = 0; i < variant->u.gs.num_inputs; i++) {
sh->in.semantic_names[i] = variant->u.gs.semantic_names[i];
sh->in.semantic_indices[i] = variant->u.gs.semantic_indices[i];
sh->in.interp[i] = TGSI_INTERPOLATE_CONSTANT;
sh->in.centroid[i] = false;
}
 
sh->in.count = variant->u.gs.num_inputs;
 
sh->in.has_pos = false;
sh->in.has_linear_interp = false;
sh->in.barycentric_interpolation_mode = 0;
}
 
/**
* Set up shader outputs for fixed-function units.
*
* XXX share the code with VS
*/
static void
gs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
bool output_clipdist, int *output_map)
{
int psize_slot = -1, pos_slot = -1;
int clipdist_slot[2] = { -1, -1 };
int color_slot[4] = { -1, -1, -1, -1 };
int num_outs, i;
 
/* find out the slots of outputs that need special care */
for (i = 0; i < tgsi->num_outputs; i++) {
switch (tgsi->outputs[i].semantic_name) {
case TGSI_SEMANTIC_PSIZE:
psize_slot = i;
break;
case TGSI_SEMANTIC_POSITION:
pos_slot = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
if (tgsi->outputs[i].semantic_index)
clipdist_slot[1] = i;
else
clipdist_slot[0] = i;
break;
case TGSI_SEMANTIC_COLOR:
if (tgsi->outputs[i].semantic_index)
color_slot[2] = i;
else
color_slot[0] = i;
break;
case TGSI_SEMANTIC_BCOLOR:
if (tgsi->outputs[i].semantic_index)
color_slot[3] = i;
else
color_slot[1] = i;
break;
default:
break;
}
}
 
/* the first two VUEs are always PSIZE and POSITION */
num_outs = 2;
output_map[0] = psize_slot;
output_map[1] = pos_slot;
 
sh->out.register_indices[0] =
(psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
sh->out.semantic_indices[0] = 0;
 
sh->out.register_indices[1] =
(pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
sh->out.semantic_indices[1] = 0;
 
sh->out.has_pos = true;
 
/* followed by optional clip distances */
if (output_clipdist) {
sh->out.register_indices[num_outs] =
(clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
sh->out.semantic_indices[num_outs] = 0;
output_map[num_outs++] = clipdist_slot[0];
 
sh->out.register_indices[num_outs] =
(clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
sh->out.semantic_indices[num_outs] = 1;
output_map[num_outs++] = clipdist_slot[1];
}
 
/*
* make BCOLOR follow COLOR so that we can make use of
* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
*/
for (i = 0; i < 4; i++) {
const int slot = color_slot[i];
 
if (slot < 0)
continue;
 
sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
 
output_map[num_outs++] = slot;
}
 
/* add the rest of the outputs */
for (i = 0; i < tgsi->num_outputs; i++) {
switch (tgsi->outputs[i].semantic_name) {
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_POSITION:
case TGSI_SEMANTIC_CLIPDIST:
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
break;
default:
sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
output_map[num_outs++] = i;
break;
}
}
 
sh->out.count = num_outs;
}
 
static void
gs_setup_vars(struct gs_compile_context *gcc)
{
int grf = gcc->first_free_grf;
int i;
 
gcc->vars.urb_write_header = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
grf++;
 
gcc->vars.tmp = tdst(TOY_FILE_GRF, grf, 0);
grf++;
 
if (gcc->write_so) {
gcc->vars.buffer_needed = gcc->out_vue_min_count - 1;
for (i = 0; i < gcc->vars.buffer_needed; i++) {
gcc->vars.buffers[i] = tdst(TOY_FILE_GRF, grf, 0);
grf += gcc->shader->out.count;
}
 
gcc->vars.so_written = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
grf++;
 
gcc->vars.so_index = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
grf++;
}
 
gcc->first_free_grf = grf;
 
if (!gcc->tgsi.reg_mapping) {
for (i = 0; i < gcc->shader->out.count; i++)
gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_GRF, grf++, 0);
 
gcc->first_free_grf = grf;
return;
}
 
for (i = 0; i < gcc->shader->out.count; i++) {
const int slot = gcc->output_map[i];
const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(&gcc->tgsi,
TGSI_FILE_OUTPUT, 0, gcc->tgsi.outputs[slot].index) : -1;
 
if (vrf >= 0)
gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_VRF, vrf, 0);
else
gcc->vars.tgsi_outs[i] = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
}
}
 
static void
gs_setup_payload(struct gs_compile_context *gcc)
{
int grf, i;
 
grf = 0;
 
/* r0: payload header */
gcc->payload.header = tsrc_d(tsrc(TOY_FILE_GRF, grf, 0));
grf++;
 
/* r1: SVBI */
if (gcc->write_so) {
gcc->payload.svbi = tsrc_ud(tsrc(TOY_FILE_GRF, grf, 0));
grf++;
}
 
/* URB data */
gcc->shader->in.start_grf = grf;
 
/* no pull constants */
 
/* VUEs */
for (i = 0; i < gcc->in_vue_count; i++) {
gcc->payload.vues[i] = tsrc(TOY_FILE_GRF, grf, 0);
grf += gcc->in_vue_size;
}
 
gcc->first_free_grf = grf;
gcc->last_free_grf = 127;
}
 
/**
* Set up GS compile context. This includes translating the TGSI tokens.
*/
static bool
gs_setup(struct gs_compile_context *gcc,
const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant,
int num_verts)
{
memset(gcc, 0, sizeof(*gcc));
 
gcc->shader = CALLOC_STRUCT(ilo_shader);
if (!gcc->shader)
return false;
 
gcc->variant = variant;
gcc->so_info = &state->info.stream_output;
 
toy_compiler_init(&gcc->tc, state->info.dev);
 
gcc->write_so = (state->info.stream_output.num_outputs > 0);
gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard;
 
gcc->tc.templ.access_mode = BRW_ALIGN_16;
gcc->tc.templ.exec_size = BRW_EXECUTE_4;
gcc->tc.rect_linear_width = 4;
 
if (state->info.tokens) {
if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) {
toy_compiler_cleanup(&gcc->tc);
FREE(gcc->shader);
return false;
}
 
switch (gcc->tgsi.props.gs_input_prim) {
case PIPE_PRIM_POINTS:
gcc->in_vue_count = 1;
break;
case PIPE_PRIM_LINES:
gcc->in_vue_count = 2;
gcc->shader->in.discard_adj = true;
break;
case PIPE_PRIM_TRIANGLES:
gcc->in_vue_count = 3;
gcc->shader->in.discard_adj = true;
break;
case PIPE_PRIM_LINES_ADJACENCY:
gcc->in_vue_count = 4;
break;
case PIPE_PRIM_TRIANGLES_ADJACENCY:
gcc->in_vue_count = 6;
break;
default:
tc_fail(&gcc->tc, "unsupported GS input type");
gcc->in_vue_count = 0;
break;
}
 
switch (gcc->tgsi.props.gs_output_prim) {
case PIPE_PRIM_POINTS:
gcc->out_vue_min_count = 1;
break;
case PIPE_PRIM_LINE_STRIP:
gcc->out_vue_min_count = 2;
break;
case PIPE_PRIM_TRIANGLE_STRIP:
gcc->out_vue_min_count = 3;
break;
default:
tc_fail(&gcc->tc, "unsupported GS output type");
gcc->out_vue_min_count = 0;
break;
}
}
else {
int i;
 
gcc->in_vue_count = num_verts;
gcc->out_vue_min_count = num_verts;
 
gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs;
for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
gcc->tgsi.outputs[i].semantic_name =
gcc->variant->u.gs.semantic_names[i];
gcc->tgsi.outputs[i].semantic_index =
gcc->variant->u.gs.semantic_indices[i];
}
}
 
gcc->tc.templ.access_mode = BRW_ALIGN_1;
 
gs_setup_shader_in(gcc->shader, gcc->variant);
gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map);
 
gcc->in_vue_size = (gcc->shader->in.count + 1) / 2;
 
gcc->out_vue_size = (gcc->shader->out.count + 1) / 2;
 
gs_setup_payload(gcc);
gs_setup_vars(gcc);
 
/* m0 is reserved for system routines */
gcc->first_free_mrf = 1;
gcc->last_free_mrf = 15;
 
return true;
}
 
/**
* Compile the geometry shader.
*/
struct ilo_shader *
ilo_shader_compile_gs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
struct gs_compile_context gcc;
 
if (!gs_setup(&gcc, state, variant, 0))
return NULL;
 
if (!gs_compile(&gcc)) {
FREE(gcc.shader);
gcc.shader = NULL;
}
 
toy_tgsi_cleanup(&gcc.tgsi);
toy_compiler_cleanup(&gcc.tc);
 
return gcc.shader;;
}
 
static bool
append_gs_to_vs(struct ilo_shader *vs, struct ilo_shader *gs, int num_verts)
{
void *combined;
int gs_offset;
 
if (!gs)
return false;
 
/* kernels must be aligned to 64-byte */
gs_offset = align(vs->kernel_size, 64);
combined = REALLOC(vs->kernel, vs->kernel_size,
gs_offset + gs->kernel_size);
if (!combined)
return false;
 
memcpy(combined + gs_offset, gs->kernel, gs->kernel_size);
 
vs->kernel = combined;
vs->kernel_size = gs_offset + gs->kernel_size;
 
vs->stream_output = true;
vs->gs_offsets[num_verts - 1] = gs_offset;
vs->gs_start_grf = gs->in.start_grf;
 
ilo_shader_destroy_kernel(gs);
 
return true;
}
 
bool
ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state,
const struct ilo_shader_variant *vs_variant,
const int *so_mapping,
struct ilo_shader *vs)
{
struct gs_compile_context gcc;
struct ilo_shader_state state;
struct ilo_shader_variant variant;
const int num_verts = 3;
int i;
 
/* init GS state and variant */
state = *vs_state;
state.info.tokens = NULL;
for (i = 0; i < state.info.stream_output.num_outputs; i++) {
const int reg = state.info.stream_output.output[i].register_index;
 
state.info.stream_output.output[i].register_index = so_mapping[reg];
}
 
variant = *vs_variant;
variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard;
variant.u.gs.num_inputs = vs->out.count;
for (i = 0; i < vs->out.count; i++) {
variant.u.gs.semantic_names[i] =
vs->out.semantic_names[i];
variant.u.gs.semantic_indices[i] =
vs->out.semantic_indices[i];
}
 
if (!gs_setup(&gcc, &state, &variant, num_verts))
return false;
 
if (!gs_compile_passthrough(&gcc)) {
FREE(gcc.shader);
gcc.shader = NULL;
}
 
/* no need to call toy_tgsi_cleanup() */
toy_compiler_cleanup(&gcc.tc);
 
return append_gs_to_vs(vs, gcc.shader, num_verts);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
0,0 → 1,222
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef ILO_SHADER_INTERNAL_H
#define ILO_SHADER_INTERNAL_H
 
#include "ilo_common.h"
#include "ilo_context.h"
#include "ilo_shader.h"
 
/* XXX The interface needs to be reworked */
 
/**
* A shader variant. It consists of non-orthogonal states of the pipe context
* affecting the compilation of a shader.
*/
struct ilo_shader_variant {
union {
struct {
bool rasterizer_discard;
int num_ucps;
} vs;
 
struct {
bool rasterizer_discard;
int num_inputs;
int semantic_names[PIPE_MAX_SHADER_INPUTS];
int semantic_indices[PIPE_MAX_SHADER_INPUTS];
} gs;
 
struct {
bool flatshade;
int fb_height;
int num_cbufs;
} fs;
} u;
 
int num_sampler_views;
struct {
unsigned r:3;
unsigned g:3;
unsigned b:3;
unsigned a:3;
} sampler_view_swizzles[ILO_MAX_SAMPLER_VIEWS];
 
uint32_t saturate_tex_coords[3];
};
 
/**
* A compiled shader.
*/
struct ilo_shader {
struct ilo_shader_variant variant;
 
struct ilo_shader_cso cso;
 
struct {
int semantic_names[PIPE_MAX_SHADER_INPUTS];
int semantic_indices[PIPE_MAX_SHADER_INPUTS];
int interp[PIPE_MAX_SHADER_INPUTS];
bool centroid[PIPE_MAX_SHADER_INPUTS];
int count;
 
int start_grf;
bool has_pos;
bool has_linear_interp;
int barycentric_interpolation_mode;
uint32_t const_interp_enable;
bool discard_adj;
} in;
 
struct {
int register_indices[PIPE_MAX_SHADER_OUTPUTS];
int semantic_names[PIPE_MAX_SHADER_OUTPUTS];
int semantic_indices[PIPE_MAX_SHADER_OUTPUTS];
int count;
 
bool has_pos;
} out;
 
bool has_kill;
bool dispatch_16;
 
bool stream_output;
int svbi_post_inc;
struct pipe_stream_output_info so_info;
 
/* for VS stream output / rasterizer discard */
int gs_offsets[3];
int gs_start_grf;
 
void *kernel;
int kernel_size;
 
bool routing_initialized;
int routing_src_semantics[PIPE_MAX_SHADER_OUTPUTS];
int routing_src_indices[PIPE_MAX_SHADER_OUTPUTS];
uint32_t routing_sprite_coord_enable;
struct ilo_kernel_routing routing;
 
/* what does the push constant buffer consist of? */
struct {
int clip_state_size;
} pcb;
 
struct list_head list;
 
/* managed by shader cache */
bool uploaded;
uint32_t cache_offset;
};
 
/**
* Information about a shader state.
*/
struct ilo_shader_info {
const struct ilo_dev_info *dev;
int type;
 
const struct tgsi_token *tokens;
 
struct pipe_stream_output_info stream_output;
struct {
unsigned req_local_mem;
unsigned req_private_mem;
unsigned req_input_mem;
} compute;
 
uint32_t non_orthogonal_states;
 
bool has_color_interp;
bool has_pos;
bool has_vertexid;
bool has_instanceid;
bool fs_color0_writes_all_cbufs;
 
int edgeflag_in;
int edgeflag_out;
 
uint32_t shadow_samplers;
int num_samplers;
};
 
/**
* A shader state.
*/
struct ilo_shader_state {
struct ilo_shader_info info;
 
struct list_head variants;
int num_variants, total_size;
 
struct ilo_shader *shader;
 
/* managed by shader cache */
struct ilo_shader_cache *cache;
struct list_head list;
};
 
void
ilo_shader_variant_init(struct ilo_shader_variant *variant,
const struct ilo_shader_info *info,
const struct ilo_context *ilo);
 
bool
ilo_shader_state_use_variant(struct ilo_shader_state *state,
const struct ilo_shader_variant *variant);
 
struct ilo_shader *
ilo_shader_compile_vs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant);
 
struct ilo_shader *
ilo_shader_compile_gs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant);
 
bool
ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state,
const struct ilo_shader_variant *vs_variant,
const int *so_mapping,
struct ilo_shader *vs);
 
struct ilo_shader *
ilo_shader_compile_fs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant);
 
struct ilo_shader *
ilo_shader_compile_cs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant);
 
static inline void
ilo_shader_destroy_kernel(struct ilo_shader *sh)
{
FREE(sh->kernel);
FREE(sh);
}
 
#endif /* ILO_SHADER_INTERNAL_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_vs.c
0,0 → 1,1289
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_util.h"
#include "toy_compiler.h"
#include "toy_tgsi.h"
#include "toy_legalize.h"
#include "toy_optimize.h"
#include "toy_helpers.h"
#include "ilo_context.h"
#include "ilo_shader_internal.h"
 
struct vs_compile_context {
struct ilo_shader *shader;
const struct ilo_shader_variant *variant;
 
struct toy_compiler tc;
struct toy_tgsi tgsi;
enum brw_message_target const_cache;
 
int output_map[PIPE_MAX_SHADER_OUTPUTS];
 
int num_grf_per_vrf;
int first_const_grf;
int first_vue_grf;
int first_free_grf;
int last_free_grf;
 
int first_free_mrf;
int last_free_mrf;
};
 
static void
vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc,
struct toy_dst dst, int dim, int idx)
{
struct toy_compiler *tc = &vcc->tc;
int slot;
 
assert(!dim);
 
slot = toy_tgsi_find_input(&vcc->tgsi, idx);
if (slot >= 0) {
const int first_in_grf = vcc->first_vue_grf +
(vcc->shader->in.count - vcc->tgsi.num_inputs);
const int grf = first_in_grf + vcc->tgsi.inputs[slot].semantic_index;
const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
 
tc_MOV(tc, dst, src);
}
else {
/* undeclared input */
tc_MOV(tc, dst, tsrc_imm_f(0.0f));
}
}
 
static void
vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
struct toy_dst dst, int dim,
struct toy_src idx)
{
const struct toy_dst header =
tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
const struct toy_dst block_offsets =
tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0));
const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
struct toy_compiler *tc = &vcc->tc;
unsigned msg_type, msg_ctrl, msg_len;
struct toy_inst *inst;
struct toy_src desc;
 
/* set message header */
inst = tc_MOV(tc, header, r0);
inst->mask_ctrl = BRW_MASK_DISABLE;
 
/* set block offsets */
tc_MOV(tc, block_offsets, idx);
 
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
msg_ctrl = BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD << 8;;
msg_len = 2;
 
desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
msg_type, msg_ctrl, ILO_VS_CONST_SURFACE(dim));
 
tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache);
}
 
static void
vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc,
struct toy_dst dst, int dim,
struct toy_src idx)
{
struct toy_compiler *tc = &vcc->tc;
const struct toy_dst offset =
tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
struct toy_src desc;
 
/*
* In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was
* changed from OWord Dual Block Read to ld to increase performance in the
* classic driver. Since we use the constant cache instead of the data
* cache, I wonder if we still want to follow the classic driver.
*/
 
/* set offset */
tc_MOV(tc, offset, idx);
 
desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
0,
ILO_VS_CONST_SURFACE(dim));
 
tc_SEND(tc, dst, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
}
 
static void
vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc,
struct toy_dst dst, int idx)
{
const uint32_t *imm;
int ch;
 
imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL);
 
for (ch = 0; ch < 4; ch++) {
/* raw moves */
tc_MOV(&vcc->tc,
tdst_writemask(tdst_ud(dst), 1 << ch),
tsrc_imm_ud(imm[ch]));
}
}
 
 
static void
vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc,
struct toy_dst dst, int dim, int idx)
{
struct toy_compiler *tc = &vcc->tc;
const struct toy_tgsi *tgsi = &vcc->tgsi;
int slot;
 
assert(!dim);
 
slot = toy_tgsi_find_system_value(tgsi, idx);
if (slot < 0)
return;
 
switch (tgsi->system_values[slot].semantic_name) {
case TGSI_SEMANTIC_INSTANCEID:
case TGSI_SEMANTIC_VERTEXID:
/*
* In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for
* the generated IDs, with VID in the X channel and IID in the Y
* channel.
*/
{
const int grf = vcc->first_vue_grf;
const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
const enum toy_swizzle swizzle =
(tgsi->system_values[slot].semantic_name ==
TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X;
 
tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle)));
}
break;
case TGSI_SEMANTIC_PRIMID:
default:
tc_fail(tc, "unhandled system value");
tc_MOV(tc, dst, tsrc_imm_d(0));
break;
}
}
 
static void
vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &vcc->tc;
int dim, idx;
 
assert(inst->src[0].file == TOY_FILE_IMM);
dim = inst->src[0].val32;
 
assert(inst->src[1].file == TOY_FILE_IMM);
idx = inst->src[1].val32;
 
switch (inst->opcode) {
case TOY_OPCODE_TGSI_IN:
vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx);
break;
case TOY_OPCODE_TGSI_CONST:
if (tc->dev->gen >= ILO_GEN(7))
vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]);
else
vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]);
break;
case TOY_OPCODE_TGSI_SV:
vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx);
break;
case TOY_OPCODE_TGSI_IMM:
assert(!dim);
vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx);
break;
default:
tc_fail(tc, "unhandled TGSI fetch");
break;
}
 
tc_discard_inst(tc, inst);
}
 
static void
vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &vcc->tc;
enum tgsi_file_type file;
int dim, idx;
struct toy_src indirect_dim, indirect_idx;
 
assert(inst->src[0].file == TOY_FILE_IMM);
file = inst->src[0].val32;
 
assert(inst->src[1].file == TOY_FILE_IMM);
dim = inst->src[1].val32;
indirect_dim = inst->src[2];
 
assert(inst->src[3].file == TOY_FILE_IMM);
idx = inst->src[3].val32;
indirect_idx = inst->src[4];
 
/* no dimension indirection */
assert(indirect_dim.file == TOY_FILE_IMM);
dim += indirect_dim.val32;
 
switch (inst->opcode) {
case TOY_OPCODE_TGSI_INDIRECT_FETCH:
if (file == TGSI_FILE_CONSTANT) {
if (idx) {
struct toy_dst tmp = tc_alloc_tmp(tc);
 
tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
indirect_idx = tsrc_from(tmp);
}
 
if (tc->dev->gen >= ILO_GEN(7))
vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx);
else
vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx);
break;
}
/* fall through */
case TOY_OPCODE_TGSI_INDIRECT_STORE:
default:
tc_fail(tc, "unhandled TGSI indirection");
break;
}
 
tc_discard_inst(tc, inst);
}
 
/**
* Emit instructions to move sampling parameters to the message registers.
*/
static int
vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf,
struct toy_src coords, int num_coords,
struct toy_src bias_or_lod, struct toy_src ref_or_si,
struct toy_src ddx, struct toy_src ddy, int num_derivs)
{
const unsigned coords_writemask = (1 << num_coords) - 1;
struct toy_dst m[3];
int num_params, i;
 
assert(num_coords <= 4);
assert(num_derivs <= 3 && num_derivs <= num_coords);
 
for (i = 0; i < Elements(m); i++)
m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0);
 
switch (msg_type) {
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod);
num_params = 5;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ),
tsrc_swizzle(ddx, 0, 0, 1, 1));
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW),
tsrc_swizzle(ddy, 0, 0, 1, 1));
if (num_derivs > 2) {
tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X),
tsrc_swizzle1(ddx, 2));
tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y),
tsrc_swizzle1(ddy, 2));
}
num_params = 4 + num_derivs * 2;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si);
tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod);
num_params = 6;
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
assert(num_coords <= 3);
tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords);
tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod);
if (tc->dev->gen >= ILO_GEN(7)) {
num_params = 4;
}
else {
tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si);
num_params = 5;
}
break;
case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod);
num_params = 1;
break;
default:
tc_fail(tc, "unknown sampler opcode");
num_params = 0;
break;
}
 
return (num_params + 3) / 4;
}
 
/**
* Set up message registers and return the message descriptor for sampling.
*/
static struct toy_src
vs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
int base_mrf, unsigned *ret_sampler_index)
{
unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si;
int num_coords, ref_pos, num_derivs;
int sampler_src;
 
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD4X2;
 
coords = inst->src[0];
ddx = tsrc_null();
ddy = tsrc_null();
bias_or_lod = tsrc_null();
ref_or_si = tsrc_null();
num_derivs = 0;
sampler_src = 1;
 
num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
 
/* extract the parameters */
switch (inst->opcode) {
case TOY_OPCODE_TGSI_TXD:
if (ref_pos >= 0)
tc_fail(tc, "TXD with shadow sampler not supported");
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
ddx = inst->src[1];
ddy = inst->src[2];
num_derivs = num_coords;
sampler_src = 3;
break;
case TOY_OPCODE_TGSI_TXL:
if (ref_pos >= 0) {
assert(ref_pos < 3);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
ref_or_si = tsrc_swizzle1(coords, ref_pos);
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
 
bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
break;
case TOY_OPCODE_TGSI_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 
switch (inst->tex.target) {
case TGSI_TEXTURE_2D_MSAA:
case TGSI_TEXTURE_2D_ARRAY_MSAA:
assert(ref_pos >= 0 && ref_pos < 4);
/* lod is always 0 */
bias_or_lod = tsrc_imm_d(0);
ref_or_si = tsrc_swizzle1(coords, ref_pos);
break;
default:
bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
break;
}
 
/* offset the coordinates */
if (!tsrc_is_null(inst->tex.offsets[0])) {
struct toy_dst tmp;
 
tmp = tc_alloc_tmp(tc);
tc_ADD(tc, tmp, coords, inst->tex.offsets[0]);
coords = tsrc_from(tmp);
}
 
sampler_src = 1;
break;
case TOY_OPCODE_TGSI_TXQ:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
num_coords = 0;
bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X);
break;
case TOY_OPCODE_TGSI_TXQ_LZ:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
num_coords = 0;
sampler_src = 0;
break;
case TOY_OPCODE_TGSI_TXL2:
if (ref_pos >= 0) {
assert(ref_pos < 4);
 
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
ref_or_si = tsrc_swizzle1(coords, ref_pos);
}
else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
 
bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X);
sampler_src = 2;
break;
default:
assert(!"unhandled sampling opcode");
if (ret_sampler_index)
*ret_sampler_index = 0;
return tsrc_null();
break;
}
 
assert(inst->src[sampler_src].file == TOY_FILE_IMM);
sampler_index = inst->src[sampler_src].val32;
binding_table_index = ILO_VS_TEXTURE_SURFACE(sampler_index);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 18:
*
* "Note that the (cube map) coordinates delivered to the sampling
* engine must already have been divided by the component with the
* largest absolute value."
*/
switch (inst->tex.target) {
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
/* TXQ does not need coordinates */
if (num_coords >= 3) {
struct toy_dst tmp, max;
struct toy_src abs_coords[3];
int i;
 
tmp = tc_alloc_tmp(tc);
max = tdst_writemask(tmp, TOY_WRITEMASK_W);
 
for (i = 0; i < 3; i++)
abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i));
 
tc_SEL(tc, max, abs_coords[0], abs_coords[0], BRW_CONDITIONAL_GE);
tc_SEL(tc, max, tsrc_from(max), abs_coords[0], BRW_CONDITIONAL_GE);
tc_INV(tc, max, tsrc_from(max));
 
for (i = 0; i < 3; i++)
tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max));
 
coords = tsrc_from(tmp);
}
break;
}
 
/* set up sampler parameters */
msg_len = vs_add_sampler_params(tc, msg_type, base_mrf,
coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
 
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 136:
*
* "The maximum message length allowed to the sampler is 11. This would
* disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
* SIMD16."
*/
if (msg_len > 11)
tc_fail(tc, "maximum length for messages to the sampler is 11");
 
if (ret_sampler_index)
*ret_sampler_index = sampler_index;
 
return tsrc_imm_mdesc_sampler(tc, msg_len, 1,
false, simd_mode, msg_type, sampler_index, binding_table_index);
}
 
static void
vs_lower_opcode_tgsi_sampling(struct vs_compile_context *vcc,
struct toy_inst *inst)
{
struct toy_compiler *tc = &vcc->tc;
struct toy_src desc;
struct toy_dst dst, tmp;
unsigned sampler_index;
int swizzles[4], i;
unsigned swizzle_zero_mask, swizzle_one_mask, swizzle_normal_mask;
bool need_filter;
 
desc = vs_prepare_tgsi_sampling(tc, inst,
vcc->first_free_mrf, &sampler_index);
 
switch (inst->opcode) {
case TOY_OPCODE_TGSI_TXF:
case TOY_OPCODE_TGSI_TXQ:
case TOY_OPCODE_TGSI_TXQ_LZ:
need_filter = false;
break;
default:
need_filter = true;
break;
}
 
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
inst->src[0] = tsrc(TOY_FILE_MRF, vcc->first_free_mrf, 0);
inst->src[1] = desc;
 
/* write to a temp first */
tmp = tc_alloc_tmp(tc);
tmp.type = inst->dst.type;
dst = inst->dst;
inst->dst = tmp;
 
tc_move_inst(tc, inst);
 
if (need_filter) {
assert(sampler_index < vcc->variant->num_sampler_views);
swizzles[0] = vcc->variant->sampler_view_swizzles[sampler_index].r;
swizzles[1] = vcc->variant->sampler_view_swizzles[sampler_index].g;
swizzles[2] = vcc->variant->sampler_view_swizzles[sampler_index].b;
swizzles[3] = vcc->variant->sampler_view_swizzles[sampler_index].a;
}
else {
swizzles[0] = PIPE_SWIZZLE_RED;
swizzles[1] = PIPE_SWIZZLE_GREEN;
swizzles[2] = PIPE_SWIZZLE_BLUE;
swizzles[3] = PIPE_SWIZZLE_ALPHA;
}
 
swizzle_zero_mask = 0;
swizzle_one_mask = 0;
swizzle_normal_mask = 0;
for (i = 0; i < 4; i++) {
switch (swizzles[i]) {
case PIPE_SWIZZLE_ZERO:
swizzle_zero_mask |= 1 << i;
swizzles[i] = i;
break;
case PIPE_SWIZZLE_ONE:
swizzle_one_mask |= 1 << i;
swizzles[i] = i;
break;
default:
swizzle_normal_mask |= 1 << i;
break;
}
}
 
/* swizzle the results */
if (swizzle_normal_mask) {
tc_MOV(tc, tdst_writemask(dst, swizzle_normal_mask),
tsrc_swizzle(tsrc_from(tmp), swizzles[0],
swizzles[1], swizzles[2], swizzles[3]));
}
if (swizzle_zero_mask)
tc_MOV(tc, tdst_writemask(dst, swizzle_zero_mask), tsrc_imm_f(0.0f));
if (swizzle_one_mask)
tc_MOV(tc, tdst_writemask(dst, swizzle_one_mask), tsrc_imm_f(1.0f));
}
 
static void
vs_lower_opcode_urb_write(struct toy_compiler *tc, struct toy_inst *inst)
{
/* vs_write_vue() has set up the message registers */
toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB);
}
 
static void
vs_lower_virtual_opcodes(struct vs_compile_context *vcc)
{
struct toy_compiler *tc = &vcc->tc;
struct toy_inst *inst;
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case TOY_OPCODE_TGSI_IN:
case TOY_OPCODE_TGSI_CONST:
case TOY_OPCODE_TGSI_SV:
case TOY_OPCODE_TGSI_IMM:
vs_lower_opcode_tgsi_direct(vcc, inst);
break;
case TOY_OPCODE_TGSI_INDIRECT_FETCH:
case TOY_OPCODE_TGSI_INDIRECT_STORE:
vs_lower_opcode_tgsi_indirect(vcc, inst);
break;
case TOY_OPCODE_TGSI_TEX:
case TOY_OPCODE_TGSI_TXB:
case TOY_OPCODE_TGSI_TXD:
case TOY_OPCODE_TGSI_TXL:
case TOY_OPCODE_TGSI_TXP:
case TOY_OPCODE_TGSI_TXF:
case TOY_OPCODE_TGSI_TXQ:
case TOY_OPCODE_TGSI_TXQ_LZ:
case TOY_OPCODE_TGSI_TEX2:
case TOY_OPCODE_TGSI_TXB2:
case TOY_OPCODE_TGSI_TXL2:
case TOY_OPCODE_TGSI_SAMPLE:
case TOY_OPCODE_TGSI_SAMPLE_I:
case TOY_OPCODE_TGSI_SAMPLE_I_MS:
case TOY_OPCODE_TGSI_SAMPLE_B:
case TOY_OPCODE_TGSI_SAMPLE_C:
case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
case TOY_OPCODE_TGSI_SAMPLE_D:
case TOY_OPCODE_TGSI_SAMPLE_L:
case TOY_OPCODE_TGSI_GATHER4:
case TOY_OPCODE_TGSI_SVIEWINFO:
case TOY_OPCODE_TGSI_SAMPLE_POS:
case TOY_OPCODE_TGSI_SAMPLE_INFO:
vs_lower_opcode_tgsi_sampling(vcc, inst);
break;
case TOY_OPCODE_INV:
case TOY_OPCODE_LOG:
case TOY_OPCODE_EXP:
case TOY_OPCODE_SQRT:
case TOY_OPCODE_RSQ:
case TOY_OPCODE_SIN:
case TOY_OPCODE_COS:
case TOY_OPCODE_FDIV:
case TOY_OPCODE_POW:
case TOY_OPCODE_INT_DIV_QUOTIENT:
case TOY_OPCODE_INT_DIV_REMAINDER:
toy_compiler_lower_math(tc, inst);
break;
case TOY_OPCODE_URB_WRITE:
vs_lower_opcode_urb_write(tc, inst);
break;
default:
if (inst->opcode > 127)
tc_fail(tc, "unhandled virtual opcode");
break;
}
}
}
 
/**
* Compile the shader.
*/
static bool
vs_compile(struct vs_compile_context *vcc)
{
struct toy_compiler *tc = &vcc->tc;
struct ilo_shader *sh = vcc->shader;
 
vs_lower_virtual_opcodes(vcc);
toy_compiler_legalize_for_ra(tc);
toy_compiler_optimize(tc);
toy_compiler_allocate_registers(tc,
vcc->first_free_grf,
vcc->last_free_grf,
vcc->num_grf_per_vrf);
toy_compiler_legalize_for_asm(tc);
 
if (tc->fail) {
ilo_err("failed to legalize VS instructions: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_VS) {
ilo_printf("legalized instructions:\n");
toy_compiler_dump(tc);
ilo_printf("\n");
}
 
if (true) {
sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
}
else {
static const uint32_t microcode[] = {
/* fill in the microcode here */
0x0, 0x0, 0x0, 0x0,
};
const bool swap = true;
 
sh->kernel_size = sizeof(microcode);
sh->kernel = MALLOC(sh->kernel_size);
 
if (sh->kernel) {
const int num_dwords = sizeof(microcode) / 4;
const uint32_t *src = microcode;
uint32_t *dst = (uint32_t *) sh->kernel;
int i;
 
for (i = 0; i < num_dwords; i += 4) {
if (swap) {
dst[i + 0] = src[i + 3];
dst[i + 1] = src[i + 2];
dst[i + 2] = src[i + 1];
dst[i + 3] = src[i + 0];
}
else {
memcpy(dst, src, 16);
}
}
}
}
 
if (!sh->kernel) {
ilo_err("failed to compile VS: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_VS) {
ilo_printf("disassembly:\n");
toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
ilo_printf("\n");
}
 
return true;
}
 
/**
* Collect the toy registers to be written to the VUE.
*/
static int
vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs)
{
const struct toy_tgsi *tgsi = &vcc->tgsi;
int i;
 
for (i = 0; i < vcc->shader->out.count; i++) {
const int slot = vcc->output_map[i];
const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(tgsi,
TGSI_FILE_OUTPUT, 0, tgsi->outputs[slot].index) : -1;
struct toy_src src;
 
if (vrf >= 0) {
struct toy_dst dst;
 
dst = tdst(TOY_FILE_VRF, vrf, 0);
src = tsrc_from(dst);
 
if (i == 0) {
/* PSIZE is at channel W */
tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_W),
tsrc_swizzle1(src, TOY_SWIZZLE_X));
 
/* the other channels are for the header */
dst = tdst_d(dst);
tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_XYZ),
tsrc_imm_d(0));
}
else {
/* initialize unused channels to 0.0f */
if (tgsi->outputs[slot].undefined_mask) {
dst = tdst_writemask(dst, tgsi->outputs[slot].undefined_mask);
tc_MOV(&vcc->tc, dst, tsrc_imm_f(0.0f));
}
}
}
else {
/* XXX this is too ugly */
if (vcc->shader->out.semantic_names[i] == TGSI_SEMANTIC_CLIPDIST &&
slot < 0) {
/* ok, we need to compute clip distance */
int clipvert_slot = -1, clipvert_vrf, j;
 
for (j = 0; j < tgsi->num_outputs; j++) {
if (tgsi->outputs[j].semantic_name ==
TGSI_SEMANTIC_CLIPVERTEX) {
clipvert_slot = j;
break;
}
else if (tgsi->outputs[j].semantic_name ==
TGSI_SEMANTIC_POSITION) {
/* remember pos, but keep looking */
clipvert_slot = j;
}
}
 
clipvert_vrf = (clipvert_slot >= 0) ? toy_tgsi_get_vrf(tgsi,
TGSI_FILE_OUTPUT, 0, tgsi->outputs[clipvert_slot].index) : -1;
if (clipvert_vrf >= 0) {
struct toy_dst tmp = tc_alloc_tmp(&vcc->tc);
struct toy_src clipvert = tsrc(TOY_FILE_VRF, clipvert_vrf, 0);
int first_ucp, last_ucp;
 
if (vcc->shader->out.semantic_indices[i]) {
first_ucp = 4;
last_ucp = MIN2(7, vcc->variant->u.vs.num_ucps - 1);
}
else {
first_ucp = 0;
last_ucp = MIN2(3, vcc->variant->u.vs.num_ucps - 1);
}
 
for (j = first_ucp; j <= last_ucp; j++) {
const int plane_grf = vcc->first_const_grf + j / 2;
const int plane_subreg = (j & 1) * 16;
const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF,
plane_grf, plane_subreg), TOY_RECT_041);
const unsigned writemask = 1 << ((j >= 4) ? j - 4 : j);
 
tc_DP4(&vcc->tc, tdst_writemask(tmp, writemask),
clipvert, plane);
}
 
src = tsrc_from(tmp);
}
else {
src = tsrc_imm_f(0.0f);
}
}
else {
src = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
}
}
 
outs[i] = src;
}
 
return i;
}
 
/**
* Emit instructions to write the VUE.
*/
static void
vs_write_vue(struct vs_compile_context *vcc)
{
struct toy_compiler *tc = &vcc->tc;
struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS];
struct toy_dst header;
struct toy_src r0;
struct toy_inst *inst;
int sent_attrs, total_attrs;
 
header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
inst = tc_MOV(tc, header, r0);
inst->mask_ctrl = BRW_MASK_DISABLE;
 
if (tc->dev->gen >= ILO_GEN(7)) {
inst = tc_OR(tc, tdst_offset(header, 0, 5),
tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010),
tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010));
inst->exec_size = BRW_EXECUTE_1;
inst->access_mode = BRW_ALIGN_1;
inst->mask_ctrl = BRW_MASK_DISABLE;
}
 
total_attrs = vs_collect_outputs(vcc, outs);
sent_attrs = 0;
while (sent_attrs < total_attrs) {
struct toy_src desc;
int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs;
int num_attrs, msg_len, i;
bool eot;
 
num_attrs = total_attrs - sent_attrs;
eot = true;
 
/* see if we need another message */
avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1;
if (num_attrs > avail_mrf_for_attrs) {
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 22:
*
* "Offset. This field specifies a destination offset (in 256-bit
* units) from the start of the URB entry(s), as referenced by
* URB Return Handle n, at which the data (if any) will be
* written."
*
* As we need to offset the following messages, we must make sure
* this one writes an even number of attributes.
*/
num_attrs = avail_mrf_for_attrs & ~1;
eot = false;
}
 
if (tc->dev->gen >= ILO_GEN(7)) {
/* do not forget about the header */
msg_len = 1 + num_attrs;
}
else {
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 26:
*
* "At least 256 bits per vertex (512 bits total, M1 & M2) must
* be written. Writing only 128 bits per vertex (256 bits
* total, M1 only) results in UNDEFINED operation."
*
* "[DevSNB] Interleave writes must be in multiples of 256 per
* vertex."
*
* That is, we must write or appear to write an even number of
* attributes, starting from two.
*/
if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) {
num_attrs--;
eot = false;
}
 
msg_len = 1 + align(num_attrs, 2);
}
 
for (i = 0; i < num_attrs; i++)
tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]);
 
assert(sent_attrs % 2 == 0);
desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0,
eot, true, false, BRW_URB_SWIZZLE_INTERLEAVE, sent_attrs / 2, 0);
 
tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc);
 
sent_attrs += num_attrs;
}
}
 
/**
* Set up shader inputs for fixed-function units.
*/
static void
vs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
{
int num_attrs, i;
 
/* vertex/instance id is the first VE if exists */
for (i = 0; i < tgsi->num_system_values; i++) {
bool found = false;
 
switch (tgsi->system_values[i].semantic_name) {
case TGSI_SEMANTIC_INSTANCEID:
case TGSI_SEMANTIC_VERTEXID:
found = true;
break;
default:
break;
}
 
if (found) {
sh->in.semantic_names[sh->in.count] =
tgsi->system_values[i].semantic_name;
sh->in.semantic_indices[sh->in.count] =
tgsi->system_values[i].semantic_index;
sh->in.interp[sh->in.count] = TGSI_INTERPOLATE_CONSTANT;
sh->in.centroid[sh->in.count] = false;
 
sh->in.count++;
break;
}
}
 
num_attrs = 0;
for (i = 0; i < tgsi->num_inputs; i++) {
assert(tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_GENERIC);
if (tgsi->inputs[i].semantic_index >= num_attrs)
num_attrs = tgsi->inputs[i].semantic_index + 1;
}
assert(num_attrs <= PIPE_MAX_ATTRIBS);
 
/* VF cannot remap VEs. VE[i] must be used as GENERIC[i]. */
for (i = 0; i < num_attrs; i++) {
sh->in.semantic_names[sh->in.count + i] = TGSI_SEMANTIC_GENERIC;
sh->in.semantic_indices[sh->in.count + i] = i;
sh->in.interp[sh->in.count + i] = TGSI_INTERPOLATE_CONSTANT;
sh->in.centroid[sh->in.count + i] = false;
}
 
sh->in.count += num_attrs;
 
sh->in.has_pos = false;
sh->in.has_linear_interp = false;
sh->in.barycentric_interpolation_mode = 0;
}
 
/**
* Set up shader outputs for fixed-function units.
*/
static void
vs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
bool output_clipdist, int *output_map)
{
int psize_slot = -1, pos_slot = -1;
int clipdist_slot[2] = { -1, -1 };
int color_slot[4] = { -1, -1, -1, -1 };
int num_outs, i;
 
/* find out the slots of outputs that need special care */
for (i = 0; i < tgsi->num_outputs; i++) {
switch (tgsi->outputs[i].semantic_name) {
case TGSI_SEMANTIC_PSIZE:
psize_slot = i;
break;
case TGSI_SEMANTIC_POSITION:
pos_slot = i;
break;
case TGSI_SEMANTIC_CLIPDIST:
if (tgsi->outputs[i].semantic_index)
clipdist_slot[1] = i;
else
clipdist_slot[0] = i;
break;
case TGSI_SEMANTIC_COLOR:
if (tgsi->outputs[i].semantic_index)
color_slot[2] = i;
else
color_slot[0] = i;
break;
case TGSI_SEMANTIC_BCOLOR:
if (tgsi->outputs[i].semantic_index)
color_slot[3] = i;
else
color_slot[1] = i;
break;
default:
break;
}
}
 
/* the first two VUEs are always PSIZE and POSITION */
num_outs = 2;
output_map[0] = psize_slot;
output_map[1] = pos_slot;
 
sh->out.register_indices[0] =
(psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
sh->out.semantic_indices[0] = 0;
 
sh->out.register_indices[1] =
(pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
sh->out.semantic_indices[1] = 0;
 
sh->out.has_pos = true;
 
/* followed by optional clip distances */
if (output_clipdist) {
sh->out.register_indices[num_outs] =
(clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
sh->out.semantic_indices[num_outs] = 0;
output_map[num_outs++] = clipdist_slot[0];
 
sh->out.register_indices[num_outs] =
(clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
sh->out.semantic_indices[num_outs] = 1;
output_map[num_outs++] = clipdist_slot[1];
}
 
/*
* make BCOLOR follow COLOR so that we can make use of
* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
*/
for (i = 0; i < 4; i++) {
const int slot = color_slot[i];
 
if (slot < 0)
continue;
 
sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
 
output_map[num_outs++] = slot;
}
 
/* add the rest of the outputs */
for (i = 0; i < tgsi->num_outputs; i++) {
switch (tgsi->outputs[i].semantic_name) {
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_POSITION:
case TGSI_SEMANTIC_CLIPDIST:
case TGSI_SEMANTIC_COLOR:
case TGSI_SEMANTIC_BCOLOR:
break;
default:
sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
output_map[num_outs++] = i;
break;
}
}
 
sh->out.count = num_outs;
}
 
/**
* Translate the TGSI tokens.
*/
static bool
vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
struct toy_tgsi *tgsi)
{
if (ilo_debug & ILO_DEBUG_VS) {
ilo_printf("dumping vertex shader\n");
ilo_printf("\n");
 
tgsi_dump(tokens, 0);
ilo_printf("\n");
}
 
toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
if (tc->fail) {
ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason);
return false;
}
 
if (ilo_debug & ILO_DEBUG_VS) {
ilo_printf("TGSI translator:\n");
toy_tgsi_dump(tgsi);
ilo_printf("\n");
toy_compiler_dump(tc);
ilo_printf("\n");
}
 
return true;
}
 
/**
* Set up VS compile context. This includes translating the TGSI tokens.
*/
static bool
vs_setup(struct vs_compile_context *vcc,
const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
int num_consts;
 
memset(vcc, 0, sizeof(*vcc));
 
vcc->shader = CALLOC_STRUCT(ilo_shader);
if (!vcc->shader)
return false;
 
vcc->variant = variant;
 
toy_compiler_init(&vcc->tc, state->info.dev);
vcc->tc.templ.access_mode = BRW_ALIGN_16;
vcc->tc.templ.exec_size = BRW_EXECUTE_8;
vcc->tc.rect_linear_width = 4;
 
/*
* The classic driver uses the sampler cache (gen6) or the data cache
* (gen7). Why?
*/
vcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
 
if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) {
toy_compiler_cleanup(&vcc->tc);
FREE(vcc->shader);
return false;
}
 
vs_setup_shader_in(vcc->shader, &vcc->tgsi);
vs_setup_shader_out(vcc->shader, &vcc->tgsi,
(vcc->variant->u.vs.num_ucps > 0), vcc->output_map);
 
/* fit each pair of user clip planes into a register */
num_consts = (vcc->variant->u.vs.num_ucps + 1) / 2;
 
/* r0 is reserved for payload header */
vcc->first_const_grf = 1;
vcc->first_vue_grf = vcc->first_const_grf + num_consts;
vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count;
vcc->last_free_grf = 127;
 
/* m0 is reserved for system routines */
vcc->first_free_mrf = 1;
vcc->last_free_mrf = 15;
 
vcc->num_grf_per_vrf = 1;
 
if (vcc->tc.dev->gen >= ILO_GEN(7)) {
vcc->last_free_grf -= 15;
vcc->first_free_mrf = vcc->last_free_grf + 1;
vcc->last_free_mrf = vcc->first_free_mrf + 14;
}
 
vcc->shader->in.start_grf = vcc->first_const_grf;
vcc->shader->pcb.clip_state_size =
vcc->variant->u.vs.num_ucps * (sizeof(float) * 4);
 
return true;
}
 
/**
* Compile the vertex shader.
*/
struct ilo_shader *
ilo_shader_compile_vs(const struct ilo_shader_state *state,
const struct ilo_shader_variant *variant)
{
struct vs_compile_context vcc;
bool need_gs;
 
if (!vs_setup(&vcc, state, variant))
return NULL;
 
if (vcc.tc.dev->gen >= ILO_GEN(7)) {
need_gs = false;
}
else {
need_gs = variant->u.vs.rasterizer_discard ||
state->info.stream_output.num_outputs;
}
 
vs_write_vue(&vcc);
 
if (!vs_compile(&vcc)) {
FREE(vcc.shader);
vcc.shader = NULL;
}
 
toy_tgsi_cleanup(&vcc.tgsi);
toy_compiler_cleanup(&vcc.tc);
 
if (need_gs) {
int so_mapping[PIPE_MAX_SHADER_OUTPUTS];
int i, j;
 
for (i = 0; i < vcc.tgsi.num_outputs; i++) {
int attr = 0;
 
for (j = 0; j < vcc.shader->out.count; j++) {
if (vcc.tgsi.outputs[i].semantic_name ==
vcc.shader->out.semantic_names[j] &&
vcc.tgsi.outputs[i].semantic_index ==
vcc.shader->out.semantic_indices[j]) {
attr = j;
break;
}
}
 
so_mapping[i] = attr;
}
 
if (!ilo_shader_compile_gs_passthrough(state, variant,
so_mapping, vcc.shader)) {
ilo_shader_destroy_kernel(vcc.shader);
vcc.shader = NULL;
}
}
 
return vcc.shader;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler.c
0,0 → 1,556
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "toy_compiler.h"
 
/**
* Dump an operand.
*/
static void
tc_dump_operand(struct toy_compiler *tc,
enum toy_file file, enum toy_type type, enum toy_rect rect,
bool indirect, unsigned indirect_subreg, uint32_t val32,
bool is_dst)
{
static const char *toy_file_names[TOY_FILE_COUNT] = {
[TOY_FILE_VRF] = "v",
[TOY_FILE_ARF] = "NOT USED",
[TOY_FILE_GRF] = "r",
[TOY_FILE_MRF] = "m",
[TOY_FILE_IMM] = "NOT USED",
};
const char *name = toy_file_names[file];
int reg, subreg;
 
if (file != TOY_FILE_IMM) {
reg = val32 / TOY_REG_WIDTH;
subreg = (val32 % TOY_REG_WIDTH) / toy_type_size(type);
}
 
switch (file) {
case TOY_FILE_GRF:
if (indirect) {
const int addr_subreg = indirect_subreg / toy_type_size(TOY_TYPE_UW);
 
ilo_printf("%s[a0.%d", name, addr_subreg);
if (val32)
ilo_printf("%+d", (int) val32);
ilo_printf("]");
break;
}
/* fall through */
case TOY_FILE_VRF:
case TOY_FILE_MRF:
ilo_printf("%s%d", name, reg);
if (subreg)
ilo_printf(".%d", subreg);
break;
case TOY_FILE_ARF:
switch (reg) {
case BRW_ARF_NULL:
ilo_printf("null");
break;
case BRW_ARF_ADDRESS:
ilo_printf("a0.%d", subreg);
break;
case BRW_ARF_ACCUMULATOR:
case BRW_ARF_ACCUMULATOR + 1:
ilo_printf("acc%d.%d", (reg & 1), subreg);
break;
case BRW_ARF_FLAG:
ilo_printf("f0.%d", subreg);
break;
case BRW_ARF_STATE:
ilo_printf("sr0.%d", subreg);
break;
case BRW_ARF_CONTROL:
ilo_printf("cr0.%d", subreg);
break;
case BRW_ARF_NOTIFICATION_COUNT:
case BRW_ARF_NOTIFICATION_COUNT + 1:
ilo_printf("n%d.%d", (reg & 1), subreg);
break;
case BRW_ARF_IP:
ilo_printf("ip");
break;
}
break;
case TOY_FILE_IMM:
switch (type) {
case TOY_TYPE_F:
{
union fi fi = { .ui = val32 };
ilo_printf("%f", fi.f);
}
break;
case TOY_TYPE_D:
ilo_printf("%d", (int32_t) val32);
break;
case TOY_TYPE_UD:
ilo_printf("%u", val32);
break;
case TOY_TYPE_W:
ilo_printf("%d", (int16_t) (val32 & 0xffff));
break;
case TOY_TYPE_UW:
ilo_printf("%u", val32 & 0xffff);
break;
case TOY_TYPE_V:
ilo_printf("0x%08x", val32);
break;
default:
assert(!"unknown imm type");
break;
}
break;
default:
assert(!"unexpected file");
break;
}
 
/* dump the region parameter */
if (file != TOY_FILE_IMM) {
int vert_stride, width, horz_stride;
 
switch (rect) {
case TOY_RECT_LINEAR:
vert_stride = tc->rect_linear_width;
width = tc->rect_linear_width;
horz_stride = 1;
break;
case TOY_RECT_041:
vert_stride = 0;
width = 4;
horz_stride = 1;
break;
case TOY_RECT_010:
vert_stride = 0;
width = 1;
horz_stride = 0;
break;
case TOY_RECT_220:
vert_stride = 2;
width = 2;
horz_stride = 0;
break;
case TOY_RECT_440:
vert_stride = 4;
width = 4;
horz_stride = 0;
break;
case TOY_RECT_240:
vert_stride = 2;
width = 4;
horz_stride = 0;
break;
default:
assert(!"unknown rect parameter");
vert_stride = 0;
width = 0;
horz_stride = 0;
break;
}
 
if (is_dst)
ilo_printf("<%d>", horz_stride);
else
ilo_printf("<%d;%d,%d>", vert_stride, width, horz_stride);
}
 
switch (type) {
case TOY_TYPE_F:
ilo_printf(":f");
break;
case TOY_TYPE_D:
ilo_printf(":d");
break;
case TOY_TYPE_UD:
ilo_printf(":ud");
break;
case TOY_TYPE_W:
ilo_printf(":w");
break;
case TOY_TYPE_UW:
ilo_printf(":uw");
break;
case TOY_TYPE_V:
ilo_printf(":v");
break;
default:
assert(!"unexpected type");
break;
}
}
 
/**
* Dump a source operand.
*/
static void
tc_dump_src(struct toy_compiler *tc, struct toy_src src)
{
if (src.negate)
ilo_printf("-");
if (src.absolute)
ilo_printf("|");
 
tc_dump_operand(tc, src.file, src.type, src.rect,
src.indirect, src.indirect_subreg, src.val32, false);
 
if (tsrc_is_swizzled(src)) {
const char xyzw[] = "xyzw";
ilo_printf(".%c%c%c%c",
xyzw[src.swizzle_x],
xyzw[src.swizzle_y],
xyzw[src.swizzle_z],
xyzw[src.swizzle_w]);
}
 
if (src.absolute)
ilo_printf("|");
}
 
/**
* Dump a destination operand.
*/
static void
tc_dump_dst(struct toy_compiler *tc, struct toy_dst dst)
{
tc_dump_operand(tc, dst.file, dst.type, dst.rect,
dst.indirect, dst.indirect_subreg, dst.val32, true);
 
if (dst.writemask != TOY_WRITEMASK_XYZW) {
ilo_printf(".");
if (dst.writemask & TOY_WRITEMASK_X)
ilo_printf("x");
if (dst.writemask & TOY_WRITEMASK_Y)
ilo_printf("y");
if (dst.writemask & TOY_WRITEMASK_Z)
ilo_printf("z");
if (dst.writemask & TOY_WRITEMASK_W)
ilo_printf("w");
}
}
 
static const char *
get_opcode_name(unsigned opcode)
{
switch (opcode) {
case BRW_OPCODE_MOV: return "mov";
case BRW_OPCODE_SEL: return "sel";
case BRW_OPCODE_NOT: return "not";
case BRW_OPCODE_AND: return "and";
case BRW_OPCODE_OR: return "or";
case BRW_OPCODE_XOR: return "xor";
case BRW_OPCODE_SHR: return "shr";
case BRW_OPCODE_SHL: return "shl";
case BRW_OPCODE_RSR: return "rsr";
case BRW_OPCODE_RSL: return "rsl";
case BRW_OPCODE_ASR: return "asr";
case BRW_OPCODE_CMP: return "cmp";
case BRW_OPCODE_CMPN: return "cmpn";
case BRW_OPCODE_JMPI: return "jmpi";
case BRW_OPCODE_IF: return "if";
case BRW_OPCODE_IFF: return "iff";
case BRW_OPCODE_ELSE: return "else";
case BRW_OPCODE_ENDIF: return "endif";
case BRW_OPCODE_DO: return "do";
case BRW_OPCODE_WHILE: return "while";
case BRW_OPCODE_BREAK: return "break";
case BRW_OPCODE_CONTINUE: return "continue";
case BRW_OPCODE_HALT: return "halt";
case BRW_OPCODE_MSAVE: return "msave";
case BRW_OPCODE_MRESTORE: return "mrestore";
case BRW_OPCODE_PUSH: return "push";
case BRW_OPCODE_POP: return "pop";
case BRW_OPCODE_WAIT: return "wait";
case BRW_OPCODE_SEND: return "send";
case BRW_OPCODE_SENDC: return "sendc";
case BRW_OPCODE_MATH: return "math";
case BRW_OPCODE_ADD: return "add";
case BRW_OPCODE_MUL: return "mul";
case BRW_OPCODE_AVG: return "avg";
case BRW_OPCODE_FRC: return "frc";
case BRW_OPCODE_RNDU: return "rndu";
case BRW_OPCODE_RNDD: return "rndd";
case BRW_OPCODE_RNDE: return "rnde";
case BRW_OPCODE_RNDZ: return "rndz";
case BRW_OPCODE_MAC: return "mac";
case BRW_OPCODE_MACH: return "mach";
case BRW_OPCODE_LZD: return "lzd";
case BRW_OPCODE_SAD2: return "sad2";
case BRW_OPCODE_SADA2: return "sada2";
case BRW_OPCODE_DP4: return "dp4";
case BRW_OPCODE_DPH: return "dph";
case BRW_OPCODE_DP3: return "dp3";
case BRW_OPCODE_DP2: return "dp2";
case BRW_OPCODE_DPA2: return "dpa2";
case BRW_OPCODE_LINE: return "line";
case BRW_OPCODE_PLN: return "pln";
case BRW_OPCODE_MAD: return "mad";
case BRW_OPCODE_NOP: return "nop";
/* TGSI */
case TOY_OPCODE_TGSI_IN: return "tgsi.in";
case TOY_OPCODE_TGSI_CONST: return "tgsi.const";
case TOY_OPCODE_TGSI_SV: return "tgsi.sv";
case TOY_OPCODE_TGSI_IMM: return "tgsi.imm";
case TOY_OPCODE_TGSI_INDIRECT_FETCH: return "tgsi.indirect_fetch";
case TOY_OPCODE_TGSI_INDIRECT_STORE: return "tgsi.indirect_store";
case TOY_OPCODE_TGSI_TEX: return "tgsi.tex";
case TOY_OPCODE_TGSI_TXB: return "tgsi.txb";
case TOY_OPCODE_TGSI_TXD: return "tgsi.txd";
case TOY_OPCODE_TGSI_TXL: return "tgsi.txl";
case TOY_OPCODE_TGSI_TXP: return "tgsi.txp";
case TOY_OPCODE_TGSI_TXF: return "tgsi.txf";
case TOY_OPCODE_TGSI_TXQ: return "tgsi.txq";
case TOY_OPCODE_TGSI_TXQ_LZ: return "tgsi.txq_lz";
case TOY_OPCODE_TGSI_TEX2: return "tgsi.tex2";
case TOY_OPCODE_TGSI_TXB2: return "tgsi.txb2";
case TOY_OPCODE_TGSI_TXL2: return "tgsi.txl2";
case TOY_OPCODE_TGSI_SAMPLE: return "tgsi.sample";
case TOY_OPCODE_TGSI_SAMPLE_I: return "tgsi.sample_i";
case TOY_OPCODE_TGSI_SAMPLE_I_MS: return "tgsi.sample_i_ms";
case TOY_OPCODE_TGSI_SAMPLE_B: return "tgsi.sample_b";
case TOY_OPCODE_TGSI_SAMPLE_C: return "tgsi.sample_c";
case TOY_OPCODE_TGSI_SAMPLE_C_LZ: return "tgsi.sample_c_lz";
case TOY_OPCODE_TGSI_SAMPLE_D: return "tgsi.sample_d";
case TOY_OPCODE_TGSI_SAMPLE_L: return "tgsi.sample_l";
case TOY_OPCODE_TGSI_GATHER4: return "tgsi.gather4";
case TOY_OPCODE_TGSI_SVIEWINFO: return "tgsi.sviewinfo";
case TOY_OPCODE_TGSI_SAMPLE_POS: return "tgsi.sample_pos";
case TOY_OPCODE_TGSI_SAMPLE_INFO: return "tgsi.sample_info";
/* math */
case TOY_OPCODE_INV: return "math.inv";
case TOY_OPCODE_LOG: return "math.log";
case TOY_OPCODE_EXP: return "math.exp";
case TOY_OPCODE_SQRT: return "math.sqrt";
case TOY_OPCODE_RSQ: return "math.rsq";
case TOY_OPCODE_SIN: return "math.sin";
case TOY_OPCODE_COS: return "math.cos";
case TOY_OPCODE_FDIV: return "math.fdiv";
case TOY_OPCODE_POW: return "math.pow";
case TOY_OPCODE_INT_DIV_QUOTIENT: return "math.int_div_quotient";
case TOY_OPCODE_INT_DIV_REMAINDER: return "math.int_div_remainer";
/* urb */
case TOY_OPCODE_URB_WRITE: return "urb.urb_write";
/* gs */
case TOY_OPCODE_EMIT: return "gs.emit";
case TOY_OPCODE_ENDPRIM: return "gs.endprim";
/* fs */
case TOY_OPCODE_DDX: return "fs.ddx";
case TOY_OPCODE_DDY: return "fs.ddy";
case TOY_OPCODE_FB_WRITE: return "fs.fb_write";
case TOY_OPCODE_KIL: return "fs.kil";
default: return "unk";
}
}
 
static const char *
get_cond_modifier_name(unsigned opcode, unsigned cond_modifier)
{
switch (opcode) {
case BRW_OPCODE_SEND:
case BRW_OPCODE_SENDC:
/* SFID */
switch (cond_modifier) {
case BRW_SFID_NULL: return "Null";
case BRW_SFID_SAMPLER: return "Sampling Engine";
case BRW_SFID_MESSAGE_GATEWAY: return "Message Gateway";
case GEN6_SFID_DATAPORT_SAMPLER_CACHE: return "Data Port Sampler Cache";
case GEN6_SFID_DATAPORT_RENDER_CACHE: return "Data Port Render Cache";
case BRW_SFID_URB: return "URB";
case BRW_SFID_THREAD_SPAWNER: return "Thread Spawner";
case GEN6_SFID_DATAPORT_CONSTANT_CACHE: return "Constant Cache";
default: return "Unknown";
}
break;
case BRW_OPCODE_MATH:
/* FC */
switch (cond_modifier) {
case BRW_MATH_FUNCTION_INV: return "INV";
case BRW_MATH_FUNCTION_LOG: return "LOG";
case BRW_MATH_FUNCTION_EXP: return "EXP";
case BRW_MATH_FUNCTION_SQRT: return "SQRT";
case BRW_MATH_FUNCTION_RSQ: return "RSQ";
case BRW_MATH_FUNCTION_SIN: return "SIN";
case BRW_MATH_FUNCTION_COS: return "COS";
case BRW_MATH_FUNCTION_FDIV: return "FDIV";
case BRW_MATH_FUNCTION_POW: return "POW";
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: return "INT DIV (quotient)";
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: return "INT DIV (remainder)";
default: return "UNK";
}
break;
default:
switch (cond_modifier) {
case BRW_CONDITIONAL_NONE: return NULL;
case BRW_CONDITIONAL_Z: return "z";
case BRW_CONDITIONAL_NZ: return "nz";
case BRW_CONDITIONAL_G: return "g";
case BRW_CONDITIONAL_GE: return "ge";
case BRW_CONDITIONAL_L: return "l";
case BRW_CONDITIONAL_LE: return "le";
default: return "unk";
}
break;
}
}
 
/**
* Dump an instruction.
*/
static void
tc_dump_inst(struct toy_compiler *tc, const struct toy_inst *inst)
{
const char *name;
int i;
 
name = get_opcode_name(inst->opcode);
 
ilo_printf(" %s", name);
 
if (inst->opcode == BRW_OPCODE_NOP) {
ilo_printf("\n");
return;
}
 
if (inst->saturate)
ilo_printf(".sat");
 
name = get_cond_modifier_name(inst->opcode, inst->cond_modifier);
if (name)
ilo_printf(".%s", name);
 
ilo_printf(" ");
 
tc_dump_dst(tc, inst->dst);
 
for (i = 0; i < Elements(inst->src); i++) {
if (tsrc_is_null(inst->src[i]))
break;
 
ilo_printf(", ");
tc_dump_src(tc, inst->src[i]);
}
 
ilo_printf("\n");
}
 
/**
* Dump the instructions added to the compiler.
*/
void
toy_compiler_dump(struct toy_compiler *tc)
{
struct toy_inst *inst;
int pc;
 
pc = 0;
tc_head(tc);
while ((inst = tc_next_no_skip(tc)) != NULL) {
/* we do not generate code for markers */
if (inst->marker)
ilo_printf("marker:");
else
ilo_printf("%6d:", pc++);
 
tc_dump_inst(tc, inst);
}
}
 
/**
* Clean up the toy compiler.
*/
void
toy_compiler_cleanup(struct toy_compiler *tc)
{
struct toy_inst *inst, *next;
 
LIST_FOR_EACH_ENTRY_SAFE(inst, next, &tc->instructions, list)
util_slab_free(&tc->mempool, inst);
 
util_slab_destroy(&tc->mempool);
}
 
/**
* Initialize the instruction template, from which tc_add() initializes the
* newly added instructions.
*/
static void
tc_init_inst_templ(struct toy_compiler *tc)
{
struct toy_inst *templ = &tc->templ;
int i;
 
templ->opcode = BRW_OPCODE_NOP;
templ->access_mode = BRW_ALIGN_1;
templ->mask_ctrl = BRW_MASK_ENABLE;
templ->dep_ctrl = BRW_DEPENDENCY_NORMAL;
templ->qtr_ctrl = GEN6_COMPRESSION_1Q;
templ->thread_ctrl = BRW_THREAD_NORMAL;
templ->pred_ctrl = BRW_PREDICATE_NONE;
templ->pred_inv = false;
templ->exec_size = BRW_EXECUTE_1;
templ->cond_modifier = BRW_CONDITIONAL_NONE;
templ->acc_wr_ctrl = false;
templ->saturate = false;
 
templ->marker = false;
 
templ->dst = tdst_null();
for (i = 0; i < Elements(templ->src); i++)
templ->src[i] = tsrc_null();
 
for (i = 0; i < Elements(templ->tex.offsets); i++)
templ->tex.offsets[i] = tsrc_null();
 
list_inithead(&templ->list);
}
 
/**
* Initialize the toy compiler.
*/
void
toy_compiler_init(struct toy_compiler *tc, const struct ilo_dev_info *dev)
{
memset(tc, 0, sizeof(*tc));
 
tc->dev = dev;
 
tc_init_inst_templ(tc);
 
util_slab_create(&tc->mempool, sizeof(struct toy_inst),
64, UTIL_SLAB_SINGLETHREADED);
 
list_inithead(&tc->instructions);
/* instructions are added to the tail */
tc_tail(tc);
 
tc->rect_linear_width = 1;
 
/* skip 0 so that util_hash_table_get() never returns NULL */
tc->next_vrf = 1;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler.h
0,0 → 1,473
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef TOY_COMPILER_H
#define TOY_COMPILER_H
 
#include "util/u_slab.h"
#include "brw_defines.h"
 
#include "ilo_common.h"
#include "toy_compiler_reg.h"
 
/**
* Toy opcodes.
*/
enum toy_opcode {
/* 0..127 are reserved for BRW_OPCODE_x */
TOY_OPCODE_LAST_HW = 127,
 
/* TGSI register functions */
TOY_OPCODE_TGSI_IN,
TOY_OPCODE_TGSI_CONST,
TOY_OPCODE_TGSI_SV,
TOY_OPCODE_TGSI_IMM,
TOY_OPCODE_TGSI_INDIRECT_FETCH,
TOY_OPCODE_TGSI_INDIRECT_STORE,
 
/* TGSI sampling functions */
TOY_OPCODE_TGSI_TEX,
TOY_OPCODE_TGSI_TXB,
TOY_OPCODE_TGSI_TXD,
TOY_OPCODE_TGSI_TXL,
TOY_OPCODE_TGSI_TXP,
TOY_OPCODE_TGSI_TXF,
TOY_OPCODE_TGSI_TXQ,
TOY_OPCODE_TGSI_TXQ_LZ,
TOY_OPCODE_TGSI_TEX2,
TOY_OPCODE_TGSI_TXB2,
TOY_OPCODE_TGSI_TXL2,
TOY_OPCODE_TGSI_SAMPLE,
TOY_OPCODE_TGSI_SAMPLE_I,
TOY_OPCODE_TGSI_SAMPLE_I_MS,
TOY_OPCODE_TGSI_SAMPLE_B,
TOY_OPCODE_TGSI_SAMPLE_C,
TOY_OPCODE_TGSI_SAMPLE_C_LZ,
TOY_OPCODE_TGSI_SAMPLE_D,
TOY_OPCODE_TGSI_SAMPLE_L,
TOY_OPCODE_TGSI_GATHER4,
TOY_OPCODE_TGSI_SVIEWINFO,
TOY_OPCODE_TGSI_SAMPLE_POS,
TOY_OPCODE_TGSI_SAMPLE_INFO,
 
/* math functions */
TOY_OPCODE_INV,
TOY_OPCODE_LOG,
TOY_OPCODE_EXP,
TOY_OPCODE_SQRT,
TOY_OPCODE_RSQ,
TOY_OPCODE_SIN,
TOY_OPCODE_COS,
TOY_OPCODE_FDIV,
TOY_OPCODE_POW,
TOY_OPCODE_INT_DIV_QUOTIENT,
TOY_OPCODE_INT_DIV_REMAINDER,
 
/* URB functions */
TOY_OPCODE_URB_WRITE,
 
/* GS-specific functions */
TOY_OPCODE_EMIT,
TOY_OPCODE_ENDPRIM,
 
/* FS-specific functions */
TOY_OPCODE_DDX,
TOY_OPCODE_DDY,
TOY_OPCODE_FB_WRITE,
TOY_OPCODE_KIL,
};
 
/**
* Toy instruction.
*/
struct toy_inst {
unsigned opcode:8; /* enum toy_opcode */
unsigned access_mode:1; /* BRW_ALIGN_x */
unsigned mask_ctrl:1; /* BRW_MASK_x */
unsigned dep_ctrl:2; /* BRW_DEPENDENCY_x */
unsigned qtr_ctrl:2; /* GEN6_COMPRESSION_x */
unsigned thread_ctrl:2; /* BRW_THREAD_x */
unsigned pred_ctrl:4; /* BRW_PREDICATE_x */
unsigned pred_inv:1; /* true or false */
unsigned exec_size:3; /* BRW_EXECUTE_x */
unsigned cond_modifier:4; /* BRW_CONDITIONAL_x */
unsigned acc_wr_ctrl:1; /* true or false */
unsigned saturate:1; /* true or false */
 
/* true if the instruction should be ignored for instruction iteration */
unsigned marker:1;
 
unsigned pad:1;
 
struct toy_dst dst;
struct toy_src src[5]; /* match TGSI_FULL_MAX_SRC_REGISTERS */
 
struct {
int target; /* TGSI_TEXTURE_x */
struct toy_src offsets[1]; /* need to be 4 when GATHER4 is supported */
} tex;
 
struct list_head list;
};
 
/**
* Toy compiler.
*/
struct toy_compiler {
const struct ilo_dev_info *dev;
 
struct toy_inst templ;
struct util_slab_mempool mempool;
struct list_head instructions;
struct list_head *iter, *iter_next;
 
/* this is not set until toy_compiler_legalize_for_asm() */
int num_instructions;
 
int rect_linear_width;
int next_vrf;
 
bool fail;
const char *reason;
};
 
/**
* Allocate the given number of VRF registers.
*/
static inline int
tc_alloc_vrf(struct toy_compiler *tc, int count)
{
const int vrf = tc->next_vrf;
 
tc->next_vrf += count;
 
return vrf;
}
 
/**
* Allocate a temporary register.
*/
static inline struct toy_dst
tc_alloc_tmp(struct toy_compiler *tc)
{
return tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, 1), 0);
}
 
/**
* Allocate four temporary registers.
*/
static inline void
tc_alloc_tmp4(struct toy_compiler *tc, struct toy_dst *tmp)
{
tmp[0] = tc_alloc_tmp(tc);
tmp[1] = tc_alloc_tmp(tc);
tmp[2] = tc_alloc_tmp(tc);
tmp[3] = tc_alloc_tmp(tc);
}
 
/**
* Duplicate an instruction at the current location.
*/
static inline struct toy_inst *
tc_duplicate_inst(struct toy_compiler *tc, const struct toy_inst *inst)
{
struct toy_inst *new_inst;
 
new_inst = util_slab_alloc(&tc->mempool);
if (!new_inst)
return NULL;
 
*new_inst = *inst;
list_addtail(&new_inst->list, tc->iter_next);
 
return new_inst;
}
 
/**
* Move an instruction to the current location.
*/
static inline void
tc_move_inst(struct toy_compiler *tc, struct toy_inst *inst)
{
list_del(&inst->list);
list_addtail(&inst->list, tc->iter_next);
}
 
/**
* Discard an instruction.
*/
static inline void
tc_discard_inst(struct toy_compiler *tc, struct toy_inst *inst)
{
list_del(&inst->list);
util_slab_free(&tc->mempool, inst);
}
 
/**
* Add a new instruction at the current location, using tc->templ as the
* template.
*/
static inline struct toy_inst *
tc_add(struct toy_compiler *tc)
{
return tc_duplicate_inst(tc, &tc->templ);
}
 
/**
* A convenient version of tc_add() for instructions with 3 source operands.
*/
static inline struct toy_inst *
tc_add3(struct toy_compiler *tc, unsigned opcode,
struct toy_dst dst,
struct toy_src src0,
struct toy_src src1,
struct toy_src src2)
{
struct toy_inst *inst;
 
inst = tc_add(tc);
if (!inst)
return NULL;
 
inst->opcode = opcode;
inst->dst = dst;
inst->src[0] = src0;
inst->src[1] = src1;
inst->src[2] = src2;
 
return inst;
}
 
/**
* A convenient version of tc_add() for instructions with 2 source operands.
*/
static inline struct toy_inst *
tc_add2(struct toy_compiler *tc, int opcode,
struct toy_dst dst,
struct toy_src src0,
struct toy_src src1)
{
return tc_add3(tc, opcode, dst, src0, src1, tsrc_null());
}
 
/**
* A convenient version of tc_add() for instructions with 1 source operand.
*/
static inline struct toy_inst *
tc_add1(struct toy_compiler *tc, unsigned opcode,
struct toy_dst dst,
struct toy_src src0)
{
return tc_add2(tc, opcode, dst, src0, tsrc_null());
}
 
/**
* A convenient version of tc_add() for instructions without source or
* destination operands.
*/
static inline struct toy_inst *
tc_add0(struct toy_compiler *tc, unsigned opcode)
{
return tc_add1(tc, opcode, tdst_null(), tsrc_null());
}
 
#define TC_ALU0(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc) \
{ \
return tc_add0(tc, opcode); \
}
 
#define TC_ALU1(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src) \
{ \
return tc_add1(tc, opcode, dst, src); \
}
 
#define TC_ALU2(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src0, \
struct toy_src src1) \
{ \
return tc_add2(tc, opcode, \
dst, src0, src1); \
}
 
#define TC_ALU3(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src0, \
struct toy_src src1, \
struct toy_src src2) \
{ \
return tc_add3(tc, opcode, \
dst, src0, src1, src2); \
}
 
#define TC_CND2(func, opcode) \
static inline struct toy_inst * \
func(struct toy_compiler *tc, \
struct toy_dst dst, \
struct toy_src src0, \
struct toy_src src1, \
unsigned cond_modifier) \
{ \
struct toy_inst *inst; \
inst = tc_add2(tc, opcode, \
dst, src0, src1); \
inst->cond_modifier = cond_modifier; \
return inst; \
}
 
TC_ALU0(tc_NOP, BRW_OPCODE_NOP)
TC_ALU0(tc_ELSE, BRW_OPCODE_ELSE)
TC_ALU0(tc_ENDIF, BRW_OPCODE_ENDIF)
TC_ALU1(tc_MOV, BRW_OPCODE_MOV)
TC_ALU1(tc_RNDD, BRW_OPCODE_RNDD)
TC_ALU1(tc_INV, TOY_OPCODE_INV)
TC_ALU1(tc_FRC, BRW_OPCODE_FRC)
TC_ALU1(tc_EXP, TOY_OPCODE_EXP)
TC_ALU1(tc_LOG, TOY_OPCODE_LOG)
TC_ALU2(tc_ADD, BRW_OPCODE_ADD)
TC_ALU2(tc_MUL, BRW_OPCODE_MUL)
TC_ALU2(tc_AND, BRW_OPCODE_AND)
TC_ALU2(tc_OR, BRW_OPCODE_OR)
TC_ALU2(tc_DP2, BRW_OPCODE_DP2)
TC_ALU2(tc_DP3, BRW_OPCODE_DP3)
TC_ALU2(tc_DP4, BRW_OPCODE_DP4)
TC_ALU2(tc_SHL, BRW_OPCODE_SHL)
TC_ALU2(tc_SHR, BRW_OPCODE_SHR)
TC_ALU2(tc_POW, TOY_OPCODE_POW)
TC_ALU3(tc_MAC, BRW_OPCODE_MAC)
TC_CND2(tc_SEL, BRW_OPCODE_SEL)
TC_CND2(tc_CMP, BRW_OPCODE_CMP)
TC_CND2(tc_IF, BRW_OPCODE_IF)
TC_CND2(tc_SEND, BRW_OPCODE_SEND)
 
/**
* Upcast a list_head to an instruction.
*/
static inline struct toy_inst *
tc_list_to_inst(struct toy_compiler *tc, struct list_head *item)
{
return container_of(item, (struct toy_inst *) NULL, list);
}
 
/**
* Return the instruction at the current location.
*/
static inline struct toy_inst *
tc_current(struct toy_compiler *tc)
{
return (tc->iter != &tc->instructions) ?
tc_list_to_inst(tc, tc->iter) : NULL;
}
 
/**
* Set the current location to the head.
*/
static inline void
tc_head(struct toy_compiler *tc)
{
tc->iter = &tc->instructions;
tc->iter_next = tc->iter->next;
}
 
/**
* Set the current location to the tail.
*/
static inline void
tc_tail(struct toy_compiler *tc)
{
tc->iter = &tc->instructions;
tc->iter_next = tc->iter;
}
 
/**
* Advance the current location.
*/
static inline struct toy_inst *
tc_next_no_skip(struct toy_compiler *tc)
{
/* stay at the tail so that new instructions are added there */
if (tc->iter_next == &tc->instructions) {
tc_tail(tc);
return NULL;
}
 
tc->iter = tc->iter_next;
tc->iter_next = tc->iter_next->next;
 
return tc_list_to_inst(tc, tc->iter);
}
 
/**
* Advance the current location, skipping markers.
*/
static inline struct toy_inst *
tc_next(struct toy_compiler *tc)
{
struct toy_inst *inst;
 
do {
inst = tc_next_no_skip(tc);
} while (inst && inst->marker);
 
return inst;
}
 
static inline void
tc_fail(struct toy_compiler *tc, const char *reason)
{
if (!tc->fail) {
tc->fail = true;
tc->reason = reason;
}
}
 
void
toy_compiler_init(struct toy_compiler *tc, const struct ilo_dev_info *dev);
 
void
toy_compiler_cleanup(struct toy_compiler *tc);
 
void
toy_compiler_dump(struct toy_compiler *tc);
 
void *
toy_compiler_assemble(struct toy_compiler *tc, int *size);
 
void
toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size);
 
#endif /* TOY_COMPILER_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler_asm.c
0,0 → 1,750
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "toy_compiler.h"
 
#define CG_REG_SHIFT 5
#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
 
struct codegen {
const struct toy_inst *inst;
int pc;
 
unsigned flag_sub_reg_num;
 
struct codegen_dst {
unsigned file;
unsigned type;
bool indirect;
unsigned indirect_subreg;
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
 
unsigned horz_stride;
 
unsigned writemask;
} dst;
 
struct codegen_src {
unsigned file;
unsigned type;
bool indirect;
unsigned indirect_subreg;
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
 
unsigned vert_stride;
unsigned width;
unsigned horz_stride;
 
unsigned swizzle[4];
bool absolute;
bool negate;
} src[3];
};
 
/**
* Return true if the source operand is null.
*/
static bool
src_is_null(const struct codegen *cg, int idx)
{
const struct codegen_src *src = &cg->src[idx];
 
return (src->file == BRW_ARCHITECTURE_REGISTER_FILE &&
src->origin == BRW_ARF_NULL << CG_REG_SHIFT);
}
 
/**
* Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
*/
static uint32_t
translate_src(const struct codegen *cg, int idx)
{
const struct codegen_src *src = &cg->src[idx];
uint32_t dw;
 
/* special treatment may be needed if any of the operand is immediate */
if (cg->src[0].file == BRW_IMMEDIATE_VALUE) {
assert(!cg->src[0].absolute && !cg->src[0].negate);
/* only the last src operand can be an immediate */
assert(src_is_null(cg, 1));
 
if (idx == 0)
return cg->flag_sub_reg_num << 25;
else
return cg->src[0].origin;
}
else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) {
assert(!cg->src[1].absolute && !cg->src[1].negate);
return cg->src[1].origin;
}
 
assert(src->file != BRW_IMMEDIATE_VALUE);
 
if (src->indirect) {
const int offset = (int) src->origin;
 
assert(src->file == BRW_GENERAL_REGISTER_FILE);
assert(offset < 512 && offset >= -512);
 
if (cg->inst->access_mode == BRW_ALIGN_16) {
assert(src->width == BRW_WIDTH_4);
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
 
/* the lower 4 bits are reserved for the swizzle_[xy] */
assert(!(src->origin & 0xf));
 
dw = src->vert_stride << 21 |
src->swizzle[3] << 18 |
src->swizzle[2] << 16 |
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
src->negate << 14 |
src->absolute << 13 |
src->indirect_subreg << 10 |
(src->origin & 0x3f0) |
src->swizzle[1] << 2 |
src->swizzle[0];
}
else {
assert(src->swizzle[0] == TOY_SWIZZLE_X &&
src->swizzle[1] == TOY_SWIZZLE_Y &&
src->swizzle[2] == TOY_SWIZZLE_Z &&
src->swizzle[3] == TOY_SWIZZLE_W);
 
dw = src->vert_stride << 21 |
src->width << 18 |
src->horz_stride << 16 |
BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
src->negate << 14 |
src->absolute << 13 |
src->indirect_subreg << 10 |
(src->origin & 0x3ff);
}
}
else {
switch (src->file) {
case BRW_ARCHITECTURE_REGISTER_FILE:
break;
case BRW_GENERAL_REGISTER_FILE:
assert(CG_REG_NUM(src->origin) < 128);
break;
case BRW_MESSAGE_REGISTER_FILE:
assert(cg->inst->opcode == BRW_OPCODE_SEND ||
cg->inst->opcode == BRW_OPCODE_SENDC);
assert(CG_REG_NUM(src->origin) < 16);
break;
case BRW_IMMEDIATE_VALUE:
default:
assert(!"invalid src file");
break;
}
 
if (cg->inst->access_mode == BRW_ALIGN_16) {
assert(src->width == BRW_WIDTH_4);
assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
 
/* the lower 4 bits are reserved for the swizzle_[xy] */
assert(!(src->origin & 0xf));
 
dw = src->vert_stride << 21 |
src->swizzle[3] << 18 |
src->swizzle[2] << 16 |
BRW_ADDRESS_DIRECT << 15 |
src->negate << 14 |
src->absolute << 13 |
src->origin |
src->swizzle[1] << 2 |
src->swizzle[0];
}
else {
assert(src->swizzle[0] == TOY_SWIZZLE_X &&
src->swizzle[1] == TOY_SWIZZLE_Y &&
src->swizzle[2] == TOY_SWIZZLE_Z &&
src->swizzle[3] == TOY_SWIZZLE_W);
 
dw = src->vert_stride << 21 |
src->width << 18 |
src->horz_stride << 16 |
BRW_ADDRESS_DIRECT << 15 |
src->negate << 14 |
src->absolute << 13 |
src->origin;
}
}
 
if (idx == 0)
dw |= cg->flag_sub_reg_num << 25;
 
return dw;
}
 
/**
* Translate the destination operand to the higher 16 bits of DW1 of the
* 1-src/2-src format.
*/
static uint16_t
translate_dst_region(const struct codegen *cg)
{
const struct codegen_dst *dst = &cg->dst;
uint16_t dw1_region;
 
if (dst->file == BRW_IMMEDIATE_VALUE) {
/* dst is immediate (JIP) when the opcode is a conditional branch */
switch (cg->inst->opcode) {
case BRW_OPCODE_IF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_WHILE:
assert(dst->type == BRW_REGISTER_TYPE_W);
dw1_region = (dst->origin & 0xffff);
break;
default:
assert(!"dst cannot be immediate");
dw1_region = 0;
break;
}
 
return dw1_region;
}
 
if (dst->indirect) {
const int offset = (int) dst->origin;
 
assert(dst->file == BRW_GENERAL_REGISTER_FILE);
assert(offset < 512 && offset >= -512);
 
if (cg->inst->access_mode == BRW_ALIGN_16) {
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 144:
*
* "Allthough Dst.HorzStride is a don't care for Align16, HW
* needs this to be programmed as 01."
*/
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
/* the lower 4 bits are reserved for the writemask */
assert(!(dst->origin & 0xf));
 
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
dst->horz_stride << 13 |
dst->indirect_subreg << 10 |
(dst->origin & 0x3f0) |
dst->writemask;
}
else {
assert(dst->writemask == TOY_WRITEMASK_XYZW);
 
dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
dst->horz_stride << 13 |
dst->indirect_subreg << 10 |
(dst->origin & 0x3ff);
}
}
else {
assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 128) ||
(dst->file == BRW_MESSAGE_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 16) ||
(dst->file == BRW_ARCHITECTURE_REGISTER_FILE));
 
if (cg->inst->access_mode == BRW_ALIGN_16) {
/* similar to the indirect case */
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
assert(!(dst->origin & 0xf));
 
dw1_region = BRW_ADDRESS_DIRECT << 15 |
dst->horz_stride << 13 |
dst->origin |
dst->writemask;
}
else {
assert(dst->writemask == TOY_WRITEMASK_XYZW);
 
dw1_region = BRW_ADDRESS_DIRECT << 15 |
dst->horz_stride << 13 |
dst->origin;
}
}
 
return dw1_region;
}
 
/**
* Translate the destination operand to DW1 of the 1-src/2-src format.
*/
static uint32_t
translate_dst(const struct codegen *cg)
{
return translate_dst_region(cg) << 16 |
cg->src[1].type << 12 |
cg->src[1].file << 10 |
cg->src[0].type << 7 |
cg->src[0].file << 5 |
cg->dst.type << 2 |
cg->dst.file;
}
 
/**
* Translate the instruction to DW0 of the 1-src/2-src format.
*/
static uint32_t
translate_inst(const struct codegen *cg)
{
const bool debug_ctrl = false;
const bool cmpt_ctrl = false;
 
assert(cg->inst->opcode < 128);
 
return cg->inst->saturate << 31 |
debug_ctrl << 30 |
cmpt_ctrl << 29 |
cg->inst->acc_wr_ctrl << 28 |
cg->inst->cond_modifier << 24 |
cg->inst->exec_size << 21 |
cg->inst->pred_inv << 20 |
cg->inst->pred_ctrl << 16 |
cg->inst->thread_ctrl << 14 |
cg->inst->qtr_ctrl << 12 |
cg->inst->dep_ctrl << 10 |
cg->inst->mask_ctrl << 9 |
cg->inst->access_mode << 8 |
cg->inst->opcode;
}
 
/**
* Codegen an instruction in 1-src/2-src format.
*/
static void
codegen_inst(const struct codegen *cg, uint32_t *code)
{
code[0] = translate_inst(cg);
code[1] = translate_dst(cg);
code[2] = translate_src(cg, 0);
code[3] = translate_src(cg, 1);
assert(src_is_null(cg, 2));
}
 
/**
* Codegen an instruction in 3-src format.
*/
static void
codegen_inst_3src(const struct codegen *cg, uint32_t *code)
{
const struct codegen_dst *dst = &cg->dst;
uint32_t dw0, dw1, dw_src[3];
int i;
 
dw0 = translate_inst(cg);
 
/*
* 3-src instruction restrictions
*
* - align16 with direct addressing
* - GRF or MRF dst
* - GRF src
* - sub_reg_num is DWORD aligned
* - no regioning except replication control
* (vert_stride == 0 && horz_stride == 0)
*/
assert(cg->inst->access_mode == BRW_ALIGN_16);
 
assert(!dst->indirect);
assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 128) ||
(dst->file == BRW_MESSAGE_REGISTER_FILE &&
CG_REG_NUM(dst->origin) < 16));
assert(!(dst->origin & 0x3));
assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
 
dw1 = dst->origin << 19 |
dst->writemask << 17 |
cg->src[2].negate << 9 |
cg->src[2].absolute << 8 |
cg->src[1].negate << 7 |
cg->src[1].absolute << 6 |
cg->src[0].negate << 5 |
cg->src[0].absolute << 4 |
cg->flag_sub_reg_num << 1 |
(dst->file == BRW_MESSAGE_REGISTER_FILE);
 
for (i = 0; i < 3; i++) {
const struct codegen_src *src = &cg->src[i];
 
assert(!src->indirect);
assert(src->file == BRW_GENERAL_REGISTER_FILE &&
CG_REG_NUM(src->origin) < 128);
assert(!(src->origin & 0x3));
 
assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 &&
src->horz_stride == BRW_HORIZONTAL_STRIDE_1) ||
(src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
src->horz_stride == BRW_HORIZONTAL_STRIDE_0));
assert(src->width == BRW_WIDTH_4);
 
dw_src[i] = src->origin << 7 |
src->swizzle[3] << 7 |
src->swizzle[2] << 5 |
src->swizzle[1] << 3 |
src->swizzle[0] << 1 |
(src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
src->horz_stride == BRW_HORIZONTAL_STRIDE_0);
 
/* only the lower 20 bits are used */
assert((dw_src[i] & 0xfffff) == dw_src[i]);
}
 
code[0] = dw0;
code[1] = dw1;
/* concatenate the bits of dw_src */
code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
}
 
/**
* Sanity check the region parameters of the operands.
*/
static void
codegen_validate_region_restrictions(const struct codegen *cg)
{
const int exec_size_map[] = {
[BRW_EXECUTE_1] = 1,
[BRW_EXECUTE_2] = 2,
[BRW_EXECUTE_4] = 4,
[BRW_EXECUTE_8] = 8,
[BRW_EXECUTE_16] = 16,
[BRW_EXECUTE_32] = 32,
};
const int width_map[] = {
[BRW_WIDTH_1] = 1,
[BRW_WIDTH_2] = 2,
[BRW_WIDTH_4] = 4,
[BRW_WIDTH_8] = 8,
[BRW_WIDTH_16] = 16,
};
const int horz_stride_map[] = {
[BRW_HORIZONTAL_STRIDE_0] = 0,
[BRW_HORIZONTAL_STRIDE_1] = 1,
[BRW_HORIZONTAL_STRIDE_2] = 2,
[BRW_HORIZONTAL_STRIDE_4] = 4,
};
const int vert_stride_map[] = {
[BRW_VERTICAL_STRIDE_0] = 0,
[BRW_VERTICAL_STRIDE_1] = 1,
[BRW_VERTICAL_STRIDE_2] = 2,
[BRW_VERTICAL_STRIDE_4] = 4,
[BRW_VERTICAL_STRIDE_8] = 8,
[BRW_VERTICAL_STRIDE_16] = 16,
[BRW_VERTICAL_STRIDE_32] = 32,
[BRW_VERTICAL_STRIDE_64] = 64,
[BRW_VERTICAL_STRIDE_128] = 128,
[BRW_VERTICAL_STRIDE_256] = 256,
[BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0,
};
const int exec_size = exec_size_map[cg->inst->exec_size];
int i;
 
/* Sandy Bridge PRM, volume 4 part 2, page 94 */
 
/* 1. (we don't do 32 anyway) */
assert(exec_size <= 16);
 
for (i = 0; i < Elements(cg->src); i++) {
const int width = width_map[cg->src[i].width];
const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
 
if (src_is_null(cg, i))
break;
 
/* 3. */
assert(exec_size >= width);
 
if (exec_size == width) {
/* 4. & 5. */
if (horz_stride)
assert(vert_stride == width * horz_stride);
}
 
if (width == 1) {
/* 6. */
assert(horz_stride == 0);
 
/* 7. */
if (exec_size == 1)
assert(vert_stride == 0);
}
 
/* 8. */
if (!vert_stride && !horz_stride)
assert(width == 1);
}
 
/* derived from 10.1.2. & 10.2. */
assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0);
}
 
static unsigned
translate_vfile(enum toy_file file)
{
switch (file) {
case TOY_FILE_ARF: return BRW_ARCHITECTURE_REGISTER_FILE;
case TOY_FILE_GRF: return BRW_GENERAL_REGISTER_FILE;
case TOY_FILE_MRF: return BRW_MESSAGE_REGISTER_FILE;
case TOY_FILE_IMM: return BRW_IMMEDIATE_VALUE;
default:
assert(!"unhandled toy file");
return BRW_GENERAL_REGISTER_FILE;
}
}
 
static unsigned
translate_vtype(enum toy_type type)
{
switch (type) {
case TOY_TYPE_F: return BRW_REGISTER_TYPE_F;
case TOY_TYPE_D: return BRW_REGISTER_TYPE_D;
case TOY_TYPE_UD: return BRW_REGISTER_TYPE_UD;
case TOY_TYPE_W: return BRW_REGISTER_TYPE_W;
case TOY_TYPE_UW: return BRW_REGISTER_TYPE_UW;
case TOY_TYPE_V: return BRW_REGISTER_TYPE_V;
default:
assert(!"unhandled toy type");
return BRW_REGISTER_TYPE_F;
}
}
 
static unsigned
translate_writemask(enum toy_writemask writemask)
{
/* TOY_WRITEMASK_* are compatible with the hardware definitions */
assert(writemask <= 0xf);
return writemask;
}
 
static unsigned
translate_swizzle(enum toy_swizzle swizzle)
{
/* TOY_SWIZZLE_* are compatible with the hardware definitions */
assert(swizzle <= 3);
return swizzle;
}
 
/**
* Prepare for generating an instruction.
*/
static void
codegen_prepare(struct codegen *cg, const struct toy_inst *inst,
int pc, int rect_linear_width)
{
int i;
 
cg->inst = inst;
cg->pc = pc;
 
cg->flag_sub_reg_num = 0;
 
cg->dst.file = translate_vfile(inst->dst.file);
cg->dst.type = translate_vtype(inst->dst.type);
cg->dst.indirect = inst->dst.indirect;
cg->dst.indirect_subreg = inst->dst.indirect_subreg;
cg->dst.origin = inst->dst.val32;
 
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 81:
*
* "For a word or an unsigned word immediate data, software must
* replicate the same 16-bit immediate value to both the lower word
* and the high word of the 32-bit immediate field in an instruction."
*/
if (inst->dst.file == TOY_FILE_IMM) {
switch (inst->dst.type) {
case TOY_TYPE_W:
case TOY_TYPE_UW:
cg->dst.origin &= 0xffff;
cg->dst.origin |= cg->dst.origin << 16;
break;
default:
break;
}
}
 
cg->dst.writemask = translate_writemask(inst->dst.writemask);
 
switch (inst->dst.rect) {
case TOY_RECT_LINEAR:
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
default:
assert(!"unsupported dst region");
cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
}
 
for (i = 0; i < Elements(cg->src); i++) {
struct codegen_src *src = &cg->src[i];
 
src->file = translate_vfile(inst->src[i].file);
src->type = translate_vtype(inst->src[i].type);
src->indirect = inst->src[i].indirect;
src->indirect_subreg = inst->src[i].indirect_subreg;
src->origin = inst->src[i].val32;
 
/* do the same for src */
if (inst->dst.file == TOY_FILE_IMM) {
switch (inst->src[i].type) {
case TOY_TYPE_W:
case TOY_TYPE_UW:
src->origin &= 0xffff;
src->origin |= src->origin << 16;
break;
default:
break;
}
}
 
src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
src->absolute = inst->src[i].absolute;
src->negate = inst->src[i].negate;
 
switch (inst->src[i].rect) {
case TOY_RECT_LINEAR:
switch (rect_linear_width) {
case 1:
src->vert_stride = BRW_VERTICAL_STRIDE_1;
src->width = BRW_WIDTH_1;
break;
case 2:
src->vert_stride = BRW_VERTICAL_STRIDE_2;
src->width = BRW_WIDTH_2;
break;
case 4:
src->vert_stride = BRW_VERTICAL_STRIDE_4;
src->width = BRW_WIDTH_4;
break;
case 8:
src->vert_stride = BRW_VERTICAL_STRIDE_8;
src->width = BRW_WIDTH_8;
break;
case 16:
src->vert_stride = BRW_VERTICAL_STRIDE_16;
src->width = BRW_WIDTH_16;
break;
default:
assert(!"unsupported TOY_RECT_LINEAR width");
src->vert_stride = BRW_VERTICAL_STRIDE_1;
src->width = BRW_WIDTH_1;
break;
}
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
case TOY_RECT_041:
src->vert_stride = BRW_VERTICAL_STRIDE_0;
src->width = BRW_WIDTH_4;
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
case TOY_RECT_010:
src->vert_stride = BRW_VERTICAL_STRIDE_0;
src->width = BRW_WIDTH_1;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
case TOY_RECT_220:
src->vert_stride = BRW_VERTICAL_STRIDE_2;
src->width = BRW_WIDTH_2;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
case TOY_RECT_440:
src->vert_stride = BRW_VERTICAL_STRIDE_4;
src->width = BRW_WIDTH_4;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
case TOY_RECT_240:
src->vert_stride = BRW_VERTICAL_STRIDE_2;
src->width = BRW_WIDTH_4;
src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
break;
default:
assert(!"unsupported src region");
src->vert_stride = BRW_VERTICAL_STRIDE_1;
src->width = BRW_WIDTH_1;
src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
break;
}
}
}
 
/**
* Generate HW shader code. The instructions should have been legalized.
*/
void *
toy_compiler_assemble(struct toy_compiler *tc, int *size)
{
const struct toy_inst *inst;
uint32_t *code;
int pc;
 
code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
if (!code)
return NULL;
 
pc = 0;
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
uint32_t *dw = &code[pc * 4];
struct codegen cg;
 
if (pc >= tc->num_instructions) {
tc_fail(tc, "wrong instructoun count");
break;
}
 
codegen_prepare(&cg, inst, pc, tc->rect_linear_width);
codegen_validate_region_restrictions(&cg);
 
switch (inst->opcode) {
case BRW_OPCODE_MAD:
codegen_inst_3src(&cg, dw);
break;
default:
codegen_inst(&cg, dw);
break;
}
 
pc++;
}
 
/* never return an invalid kernel */
if (tc->fail) {
FREE(code);
return NULL;
}
 
if (size)
*size = pc * 4 * sizeof(uint32_t);
 
return code;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c
0,0 → 1,1385
/*
* Copyright © 2008 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that copyright
* notice and this permission notice appear in supporting documentation, and
* that the name of the copyright holders not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. The copyright holders make no representations
* about the suitability of this software for any purpose. It is provided "as
* is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*/
 
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include <unistd.h>
#include <stdarg.h>
 
typedef short GLshort;
typedef int GLint;
typedef unsigned char GLubyte;
typedef unsigned int GLuint;
typedef float GLfloat;
#include <stdint.h>
#include "brw_defines.h"
#include "brw_structs.h"
static int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
 
#include "toy_compiler.h"
 
void
toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size)
{
/* set this to true to dump the hex */
const bool dump_hex = false;
const struct brw_instruction *instructions = kernel;
int i;
 
for (i = 0; i < size / sizeof(*instructions); i++) {
if (dump_hex) {
const uint32_t *dwords = (const uint32_t *) &instructions[i];
ilo_printf("0x%08x 0x%08x 0x%08x 0x%08x ",
dwords[3], dwords[2], dwords[1], dwords[0]);
}
 
brw_disasm(stderr, (struct brw_instruction *) &instructions[i],
ILO_GEN_GET_MAJOR(tc->dev->gen));
}
}
 
static const struct opcode_desc {
char *name;
int nsrc;
int ndst;
} opcode_descs[128] = {
[BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
 
[BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
[BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
 
[BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
 
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
};
static const struct opcode_desc *opcode = opcode_descs;
 
static const char * const conditional_modifier[16] = {
[BRW_CONDITIONAL_NONE] = "",
[BRW_CONDITIONAL_Z] = ".e",
[BRW_CONDITIONAL_NZ] = ".ne",
[BRW_CONDITIONAL_G] = ".g",
[BRW_CONDITIONAL_GE] = ".ge",
[BRW_CONDITIONAL_L] = ".l",
[BRW_CONDITIONAL_LE] = ".le",
[BRW_CONDITIONAL_R] = ".r",
[BRW_CONDITIONAL_O] = ".o",
[BRW_CONDITIONAL_U] = ".u",
};
 
static const char * const negate[2] = {
[0] = "",
[1] = "-",
};
 
static const char * const _abs[2] = {
[0] = "",
[1] = "(abs)",
};
 
static const char * const vert_stride[16] = {
[0] = "0",
[1] = "1",
[2] = "2",
[3] = "4",
[4] = "8",
[5] = "16",
[6] = "32",
[15] = "VxH",
};
 
static const char * const width[8] = {
[0] = "1",
[1] = "2",
[2] = "4",
[3] = "8",
[4] = "16",
};
 
static const char * const horiz_stride[4] = {
[0] = "0",
[1] = "1",
[2] = "2",
[3] = "4"
};
 
static const char * const chan_sel[4] = {
[0] = "x",
[1] = "y",
[2] = "z",
[3] = "w",
};
 
static const char * const debug_ctrl[2] = {
[0] = "",
[1] = ".breakpoint"
};
 
static const char * const saturate[2] = {
[0] = "",
[1] = ".sat"
};
 
static const char * const accwr[2] = {
[0] = "",
[1] = "AccWrEnable"
};
 
static const char * const wectrl[2] = {
[0] = "WE_normal",
[1] = "WE_all"
};
 
static const char * const exec_size[8] = {
[0] = "1",
[1] = "2",
[2] = "4",
[3] = "8",
[4] = "16",
[5] = "32"
};
 
static const char * const pred_inv[2] = {
[0] = "+",
[1] = "-"
};
 
static const char * const pred_ctrl_align16[16] = {
[1] = "",
[2] = ".x",
[3] = ".y",
[4] = ".z",
[5] = ".w",
[6] = ".any4h",
[7] = ".all4h",
};
 
static const char * const pred_ctrl_align1[16] = {
[1] = "",
[2] = ".anyv",
[3] = ".allv",
[4] = ".any2h",
[5] = ".all2h",
[6] = ".any4h",
[7] = ".all4h",
[8] = ".any8h",
[9] = ".all8h",
[10] = ".any16h",
[11] = ".all16h",
};
 
static const char * const thread_ctrl[4] = {
[0] = "",
[2] = "switch"
};
 
static const char * const compr_ctrl[4] = {
[0] = "",
[1] = "sechalf",
[2] = "compr",
[3] = "compr4",
};
 
static const char * const dep_ctrl[4] = {
[0] = "",
[1] = "NoDDClr",
[2] = "NoDDChk",
[3] = "NoDDClr,NoDDChk",
};
 
static const char * const mask_ctrl[4] = {
[0] = "",
[1] = "nomask",
};
 
static const char * const access_mode[2] = {
[0] = "align1",
[1] = "align16",
};
 
static const char * const reg_encoding[8] = {
[0] = "UD",
[1] = "D",
[2] = "UW",
[3] = "W",
[4] = "UB",
[5] = "B",
[7] = "F"
};
 
const int reg_type_size[8] = {
[0] = 4,
[1] = 4,
[2] = 2,
[3] = 2,
[4] = 1,
[5] = 1,
[7] = 4
};
 
static const char * const reg_file[4] = {
[0] = "A",
[1] = "g",
[2] = "m",
[3] = "imm",
};
 
static const char * const writemask[16] = {
[0x0] = ".",
[0x1] = ".x",
[0x2] = ".y",
[0x3] = ".xy",
[0x4] = ".z",
[0x5] = ".xz",
[0x6] = ".yz",
[0x7] = ".xyz",
[0x8] = ".w",
[0x9] = ".xw",
[0xa] = ".yw",
[0xb] = ".xyw",
[0xc] = ".zw",
[0xd] = ".xzw",
[0xe] = ".yzw",
[0xf] = "",
};
 
static const char * const end_of_thread[2] = {
[0] = "",
[1] = "EOT"
};
 
static const char * const target_function[16] = {
[BRW_SFID_NULL] = "null",
[BRW_SFID_MATH] = "math",
[BRW_SFID_SAMPLER] = "sampler",
[BRW_SFID_MESSAGE_GATEWAY] = "gateway",
[BRW_SFID_DATAPORT_READ] = "read",
[BRW_SFID_DATAPORT_WRITE] = "write",
[BRW_SFID_URB] = "urb",
[BRW_SFID_THREAD_SPAWNER] = "thread_spawner"
};
 
static const char * const target_function_gen6[16] = {
[BRW_SFID_NULL] = "null",
[BRW_SFID_MATH] = "math",
[BRW_SFID_SAMPLER] = "sampler",
[BRW_SFID_MESSAGE_GATEWAY] = "gateway",
[BRW_SFID_URB] = "urb",
[BRW_SFID_THREAD_SPAWNER] = "thread_spawner",
[GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
[GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
[GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
[GEN7_SFID_DATAPORT_DATA_CACHE] = "data"
};
 
static const char * const dp_rc_msg_type_gen6[16] = {
[BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
[GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
[GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
[GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
[GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
[GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
[GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
[GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
[GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
[GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
[GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
[GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
[GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
[GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
};
 
static const char * const math_function[16] = {
[BRW_MATH_FUNCTION_INV] = "inv",
[BRW_MATH_FUNCTION_LOG] = "log",
[BRW_MATH_FUNCTION_EXP] = "exp",
[BRW_MATH_FUNCTION_SQRT] = "sqrt",
[BRW_MATH_FUNCTION_RSQ] = "rsq",
[BRW_MATH_FUNCTION_SIN] = "sin",
[BRW_MATH_FUNCTION_COS] = "cos",
[BRW_MATH_FUNCTION_SINCOS] = "sincos",
[BRW_MATH_FUNCTION_FDIV] = "fdiv",
[BRW_MATH_FUNCTION_POW] = "pow",
[BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
[BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
[BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
};
 
static const char * const math_saturate[2] = {
[0] = "",
[1] = "sat"
};
 
static const char * const math_signed[2] = {
[0] = "",
[1] = "signed"
};
 
static const char * const math_scalar[2] = {
[0] = "",
[1] = "scalar"
};
 
static const char * const math_precision[2] = {
[0] = "",
[1] = "partial_precision"
};
 
static const char * const urb_opcode[2] = {
[0] = "urb_write",
[1] = "ff_sync",
};
 
static const char * const urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
[BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
};
 
static const char * const urb_allocate[2] = {
[0] = "",
[1] = "allocate"
};
 
static const char * const urb_used[2] = {
[0] = "",
[1] = "used"
};
 
static const char * const urb_complete[2] = {
[0] = "",
[1] = "complete"
};
 
static const char * const sampler_target_format[4] = {
[0] = "F",
[2] = "UD",
[3] = "D"
};
 
 
static int column;
 
static int string (FILE *file, const char *string)
{
fputs (string, file);
column += strlen (string);
return 0;
}
 
static int format (FILE *f, const char *format, ...)
{
char buf[1024];
va_list args;
va_start (args, format);
 
vsnprintf (buf, sizeof (buf) - 1, format, args);
va_end (args);
string (f, buf);
return 0;
}
 
static int newline (FILE *f)
{
putc ('\n', f);
column = 0;
return 0;
}
 
static int pad (FILE *f, int c)
{
do
string (f, " ");
while (column < c);
return 0;
}
 
static int control (FILE *file, const char *name, const char * const ctrl[],
GLuint id, int *space)
{
if (!ctrl[id]) {
fprintf (file, "*** invalid %s value %d ",
name, id);
return 1;
}
if (ctrl[id][0])
{
if (space && *space)
string (file, " ");
string (file, ctrl[id]);
if (space)
*space = 1;
}
return 0;
}
 
static int print_opcode (FILE *file, int id)
{
if (!opcode[id].name) {
format (file, "*** invalid opcode value %d ", id);
return 1;
}
string (file, opcode[id].name);
return 0;
}
 
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
{
int err = 0;
 
/* Clear the Compr4 instruction compression bit. */
if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
_reg_nr &= ~(1 << 7);
 
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
string (file, "null");
return -1;
case BRW_ARF_ADDRESS:
format (file, "a%d", _reg_nr & 0x0f);
break;
case BRW_ARF_ACCUMULATOR:
format (file, "acc%d", _reg_nr & 0x0f);
break;
case BRW_ARF_FLAG:
format (file, "f%d", _reg_nr & 0x0f);
break;
case BRW_ARF_MASK:
format (file, "mask%d", _reg_nr & 0x0f);
break;
case BRW_ARF_MASK_STACK:
format (file, "msd%d", _reg_nr & 0x0f);
break;
case BRW_ARF_STATE:
format (file, "sr%d", _reg_nr & 0x0f);
break;
case BRW_ARF_CONTROL:
format (file, "cr%d", _reg_nr & 0x0f);
break;
case BRW_ARF_NOTIFICATION_COUNT:
format (file, "n%d", _reg_nr & 0x0f);
break;
case BRW_ARF_IP:
string (file, "ip");
return -1;
break;
default:
format (file, "ARF%d", _reg_nr);
break;
}
} else {
err |= control (file, "src reg file", reg_file, _reg_file, NULL);
format (file, "%d", _reg_nr);
}
return err;
}
 
static int dest (FILE *file, struct brw_instruction *inst)
{
int err = 0;
 
if (inst->header.access_mode == BRW_ALIGN_1)
{
if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
{
err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
reg_type_size[inst->bits1.da1.dest_reg_type]);
string (file, "<");
err |= control (file, "horiz stride", horiz_stride, inst->bits1.da1.dest_horiz_stride, NULL);
string (file, ">");
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
else
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
reg_type_size[inst->bits1.ia1.dest_reg_type]);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]<");
err |= control (file, "horiz stride", horiz_stride, inst->bits1.ia1.dest_horiz_stride, NULL);
string (file, ">");
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
}
}
else
{
if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
{
err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
reg_type_size[inst->bits1.da16.dest_reg_type]);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
}
else
{
err = 1;
string (file, "Indirect align16 address mode not supported");
}
}
 
return 0;
}
 
static int dest_3src (FILE *file, struct brw_instruction *inst)
{
int err = 0;
uint32_t reg_file;
 
if (inst->bits1.da3src.dest_reg_file)
reg_file = BRW_MESSAGE_REGISTER_FILE;
else
reg_file = BRW_GENERAL_REGISTER_FILE;
 
err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
if (err == -1)
return 0;
if (inst->bits1.da3src.dest_subreg_nr)
format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL);
 
return 0;
}
 
static int src_align1_region (FILE *file,
GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
{
int err = 0;
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
string (file, ",");
err |= control (file, "width", width, _width, NULL);
string (file, ",");
err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
string (file, ">");
return err;
}
 
static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
{
int err = 0;
err |= control (file, "negate", negate, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
 
err |= reg (file, _reg_file, reg_num);
if (err == -1)
return 0;
if (sub_reg_num)
format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
}
 
static int src_ia1 (FILE *file,
GLuint type,
GLuint _reg_file,
GLint _addr_imm,
GLuint _addr_subreg_nr,
GLuint _negate,
GLuint __abs,
GLuint _addr_mode,
GLuint _horiz_stride,
GLuint _width,
GLuint _vert_stride)
{
int err = 0;
err |= control (file, "negate", negate, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
 
string (file, "g[a0");
if (_addr_subreg_nr)
format (file, ".%d", _addr_subreg_nr);
if (_addr_imm)
format (file, " %d", _addr_imm);
string (file, "]");
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
}
 
static int src_da16 (FILE *file,
GLuint _reg_type,
GLuint _reg_file,
GLuint _vert_stride,
GLuint _reg_nr,
GLuint _subreg_nr,
GLuint __abs,
GLuint _negate,
GLuint swz_x,
GLuint swz_y,
GLuint swz_z,
GLuint swz_w)
{
int err = 0;
err |= control (file, "negate", negate, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
 
err |= reg (file, _reg_file, _reg_nr);
if (err == -1)
return 0;
if (_subreg_nr)
/* bit4 for subreg number byte addressing. Make this same meaning as
in da1 case, so output looks consistent. */
format (file, ".%d", 16 / reg_type_size[_reg_type]);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
string (file, ",4,1>");
/*
* Three kinds of swizzle display:
* identity - nothing printed
* 1->all - print the single channel
* 1->1 - print the mapping
*/
if (swz_x == BRW_CHANNEL_X &&
swz_y == BRW_CHANNEL_Y &&
swz_z == BRW_CHANNEL_Z &&
swz_w == BRW_CHANNEL_W)
{
;
}
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
}
else
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
err |= control (file, "channel select", chan_sel, swz_y, NULL);
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
return err;
}
 
static int src0_3src (FILE *file, struct brw_instruction *inst)
{
int err = 0;
GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
 
err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL);
err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
 
err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
if (err == -1)
return 0;
if (inst->bits2.da3src.src0_subreg_nr)
format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
string (file, "<4,1,1>");
err |= control (file, "src da16 reg type", reg_encoding,
BRW_REGISTER_TYPE_F, NULL);
/*
* Three kinds of swizzle display:
* identity - nothing printed
* 1->all - print the single channel
* 1->1 - print the mapping
*/
if (swz_x == BRW_CHANNEL_X &&
swz_y == BRW_CHANNEL_Y &&
swz_z == BRW_CHANNEL_Z &&
swz_w == BRW_CHANNEL_W)
{
;
}
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
}
else
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
err |= control (file, "channel select", chan_sel, swz_y, NULL);
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
return err;
}
 
static int src1_3src (FILE *file, struct brw_instruction *inst)
{
int err = 0;
GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
(inst->bits3.da3src.src1_subreg_nr_high << 2));
 
err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate,
NULL);
err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
 
err |= reg (file, BRW_GENERAL_REGISTER_FILE,
inst->bits3.da3src.src1_reg_nr);
if (err == -1)
return 0;
if (src1_subreg_nr)
format (file, ".%d", src1_subreg_nr);
string (file, "<4,1,1>");
err |= control (file, "src da16 reg type", reg_encoding,
BRW_REGISTER_TYPE_F, NULL);
/*
* Three kinds of swizzle display:
* identity - nothing printed
* 1->all - print the single channel
* 1->1 - print the mapping
*/
if (swz_x == BRW_CHANNEL_X &&
swz_y == BRW_CHANNEL_Y &&
swz_z == BRW_CHANNEL_Z &&
swz_w == BRW_CHANNEL_W)
{
;
}
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
}
else
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
err |= control (file, "channel select", chan_sel, swz_y, NULL);
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
return err;
}
 
 
static int src2_3src (FILE *file, struct brw_instruction *inst)
{
int err = 0;
GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
 
err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate,
NULL);
err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
 
err |= reg (file, BRW_GENERAL_REGISTER_FILE,
inst->bits3.da3src.src2_reg_nr);
if (err == -1)
return 0;
if (inst->bits3.da3src.src2_subreg_nr)
format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
string (file, "<4,1,1>");
err |= control (file, "src da16 reg type", reg_encoding,
BRW_REGISTER_TYPE_F, NULL);
/*
* Three kinds of swizzle display:
* identity - nothing printed
* 1->all - print the single channel
* 1->1 - print the mapping
*/
if (swz_x == BRW_CHANNEL_X &&
swz_y == BRW_CHANNEL_Y &&
swz_z == BRW_CHANNEL_Z &&
swz_w == BRW_CHANNEL_W)
{
;
}
else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
}
else
{
string (file, ".");
err |= control (file, "channel select", chan_sel, swz_x, NULL);
err |= control (file, "channel select", chan_sel, swz_y, NULL);
err |= control (file, "channel select", chan_sel, swz_z, NULL);
err |= control (file, "channel select", chan_sel, swz_w, NULL);
}
return err;
}
 
static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
switch (type) {
case BRW_REGISTER_TYPE_UD:
format (file, "0x%08xUD", inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_D:
format (file, "%dD", inst->bits3.d);
break;
case BRW_REGISTER_TYPE_UW:
format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_W:
format (file, "%dW", (int16_t) inst->bits3.d);
break;
case BRW_REGISTER_TYPE_UB:
format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_VF:
format (file, "Vector Float");
break;
case BRW_REGISTER_TYPE_V:
format (file, "0x%08xV", inst->bits3.ud);
break;
case BRW_REGISTER_TYPE_F:
format (file, "%-gF", inst->bits3.f);
}
return 0;
}
 
static int src0 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src0_reg_type,
inst);
else if (inst->header.access_mode == BRW_ALIGN_1)
{
if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da1 (file,
inst->bits1.da1.src0_reg_type,
inst->bits1.da1.src0_reg_file,
inst->bits2.da1.src0_vert_stride,
inst->bits2.da1.src0_width,
inst->bits2.da1.src0_horiz_stride,
inst->bits2.da1.src0_reg_nr,
inst->bits2.da1.src0_subreg_nr,
inst->bits2.da1.src0_abs,
inst->bits2.da1.src0_negate);
}
else
{
return src_ia1 (file,
inst->bits1.ia1.src0_reg_type,
inst->bits1.ia1.src0_reg_file,
inst->bits2.ia1.src0_indirect_offset,
inst->bits2.ia1.src0_subreg_nr,
inst->bits2.ia1.src0_negate,
inst->bits2.ia1.src0_abs,
inst->bits2.ia1.src0_address_mode,
inst->bits2.ia1.src0_horiz_stride,
inst->bits2.ia1.src0_width,
inst->bits2.ia1.src0_vert_stride);
}
}
else
{
if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da16 (file,
inst->bits1.da16.src0_reg_type,
inst->bits1.da16.src0_reg_file,
inst->bits2.da16.src0_vert_stride,
inst->bits2.da16.src0_reg_nr,
inst->bits2.da16.src0_subreg_nr,
inst->bits2.da16.src0_abs,
inst->bits2.da16.src0_negate,
inst->bits2.da16.src0_swz_x,
inst->bits2.da16.src0_swz_y,
inst->bits2.da16.src0_swz_z,
inst->bits2.da16.src0_swz_w);
}
else
{
string (file, "Indirect align16 address mode not supported");
return 1;
}
}
}
 
static int src1 (FILE *file, struct brw_instruction *inst)
{
if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
return imm (file, inst->bits1.da1.src1_reg_type,
inst);
else if (inst->header.access_mode == BRW_ALIGN_1)
{
if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da1 (file,
inst->bits1.da1.src1_reg_type,
inst->bits1.da1.src1_reg_file,
inst->bits3.da1.src1_vert_stride,
inst->bits3.da1.src1_width,
inst->bits3.da1.src1_horiz_stride,
inst->bits3.da1.src1_reg_nr,
inst->bits3.da1.src1_subreg_nr,
inst->bits3.da1.src1_abs,
inst->bits3.da1.src1_negate);
}
else
{
return src_ia1 (file,
inst->bits1.ia1.src1_reg_type,
inst->bits1.ia1.src1_reg_file,
inst->bits3.ia1.src1_indirect_offset,
inst->bits3.ia1.src1_subreg_nr,
inst->bits3.ia1.src1_negate,
inst->bits3.ia1.src1_abs,
inst->bits3.ia1.src1_address_mode,
inst->bits3.ia1.src1_horiz_stride,
inst->bits3.ia1.src1_width,
inst->bits3.ia1.src1_vert_stride);
}
}
else
{
if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
{
return src_da16 (file,
inst->bits1.da16.src1_reg_type,
inst->bits1.da16.src1_reg_file,
inst->bits3.da16.src1_vert_stride,
inst->bits3.da16.src1_reg_nr,
inst->bits3.da16.src1_subreg_nr,
inst->bits3.da16.src1_abs,
inst->bits3.da16.src1_negate,
inst->bits3.da16.src1_swz_x,
inst->bits3.da16.src1_swz_y,
inst->bits3.da16.src1_swz_z,
inst->bits3.da16.src1_swz_w);
}
else
{
string (file, "Indirect align16 address mode not supported");
return 1;
}
}
}
 
int esize[6] = {
[0] = 1,
[1] = 2,
[2] = 4,
[3] = 8,
[4] = 16,
[5] = 32,
};
 
static int qtr_ctrl(FILE *file, struct brw_instruction *inst)
{
int qtr_ctl = inst->header.compression_control;
int exec_size = esize[inst->header.execution_size];
 
if (exec_size == 8) {
switch (qtr_ctl) {
case 0:
string (file, " 1Q");
break;
case 1:
string (file, " 2Q");
break;
case 2:
string (file, " 3Q");
break;
case 3:
string (file, " 4Q");
break;
}
} else if (exec_size == 16){
if (qtr_ctl < 2)
string (file, " 1H");
else
string (file, " 2H");
}
return 0;
}
 
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
 
if (inst->header.predicate_control) {
string (file, "(");
err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
if (inst->bits2.da1.flag_subreg_nr)
format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
if (inst->header.access_mode == BRW_ALIGN_1)
err |= control (file, "predicate control align1", pred_ctrl_align1,
inst->header.predicate_control, NULL);
else
err |= control (file, "predicate control align16", pred_ctrl_align16,
inst->header.predicate_control, NULL);
string (file, ") ");
}
 
err |= print_opcode (file, inst->header.opcode);
err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
 
if (inst->header.opcode == BRW_OPCODE_MATH) {
string (file, " ");
err |= control (file, "function", math_function,
inst->header.destreg__conditionalmod, NULL);
} else if (inst->header.opcode != BRW_OPCODE_SEND &&
inst->header.opcode != BRW_OPCODE_SENDC) {
err |= control (file, "conditional modifier", conditional_modifier,
inst->header.destreg__conditionalmod, NULL);
 
/* If we're using the conditional modifier, print which flags reg is
* used for it. Note that on gen6+, the embedded-condition SEL and
* control flow doesn't update flags.
*/
if (inst->header.destreg__conditionalmod &&
(gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL &&
inst->header.opcode != BRW_OPCODE_IF &&
inst->header.opcode != BRW_OPCODE_WHILE))) {
format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
if (inst->bits2.da1.flag_subreg_nr)
format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
}
}
 
if (inst->header.opcode != BRW_OPCODE_NOP) {
string (file, "(");
err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
string (file, ")");
}
 
if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6)
format (file, " %d", inst->header.destreg__conditionalmod);
 
if (opcode[inst->header.opcode].nsrc == 3) {
pad (file, 16);
err |= dest_3src (file, inst);
 
pad (file, 32);
err |= src0_3src (file, inst);
 
pad (file, 48);
err |= src1_3src (file, inst);
 
pad (file, 64);
err |= src2_3src (file, inst);
} else {
if (opcode[inst->header.opcode].ndst > 0) {
pad (file, 16);
err |= dest (file, inst);
} else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE ||
inst->header.opcode == BRW_OPCODE_ENDIF ||
inst->header.opcode == BRW_OPCODE_WHILE)) {
format (file, " %d", inst->bits3.break_cont.jip);
} else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF ||
inst->header.opcode == BRW_OPCODE_ELSE ||
inst->header.opcode == BRW_OPCODE_ENDIF ||
inst->header.opcode == BRW_OPCODE_WHILE)) {
format (file, " %d", inst->bits1.branch_gen6.jump_count);
} else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK ||
inst->header.opcode == BRW_OPCODE_CONTINUE ||
inst->header.opcode == BRW_OPCODE_HALT)) ||
(gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) {
format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
} else if (inst->header.opcode == BRW_OPCODE_JMPI) {
format (file, " %d", inst->bits3.d);
}
 
if (opcode[inst->header.opcode].nsrc > 0) {
pad (file, 32);
err |= src0 (file, inst);
}
if (opcode[inst->header.opcode].nsrc > 1) {
pad (file, 48);
err |= src1 (file, inst);
}
}
 
if (inst->header.opcode == BRW_OPCODE_SEND ||
inst->header.opcode == BRW_OPCODE_SENDC) {
enum brw_message_target target;
 
if (gen >= 6)
target = inst->header.destreg__conditionalmod;
else if (gen == 5)
target = inst->bits2.send_gen5.sfid;
else
target = inst->bits3.generic.msg_target;
 
newline (file);
pad (file, 16);
space = 0;
 
if (gen >= 6) {
err |= control (file, "target function", target_function_gen6,
target, &space);
} else {
err |= control (file, "target function", target_function,
target, &space);
}
 
switch (target) {
case BRW_SFID_MATH:
err |= control (file, "math function", math_function,
inst->bits3.math.function, &space);
err |= control (file, "math saturate", math_saturate,
inst->bits3.math.saturate, &space);
err |= control (file, "math signed", math_signed,
inst->bits3.math.int_type, &space);
err |= control (file, "math scalar", math_scalar,
inst->bits3.math.data_type, &space);
err |= control (file, "math precision", math_precision,
inst->bits3.math.precision, &space);
break;
case BRW_SFID_SAMPLER:
if (gen >= 7) {
format (file, " (%d, %d, %d, %d)",
inst->bits3.sampler_gen7.binding_table_index,
inst->bits3.sampler_gen7.sampler,
inst->bits3.sampler_gen7.msg_type,
inst->bits3.sampler_gen7.simd_mode);
} else if (gen >= 5) {
format (file, " (%d, %d, %d, %d)",
inst->bits3.sampler_gen5.binding_table_index,
inst->bits3.sampler_gen5.sampler,
inst->bits3.sampler_gen5.msg_type,
inst->bits3.sampler_gen5.simd_mode);
} else if (0 /* FINISHME: is_g4x */) {
format (file, " (%d, %d)",
inst->bits3.sampler_g4x.binding_table_index,
inst->bits3.sampler_g4x.sampler);
} else {
format (file, " (%d, %d, ",
inst->bits3.sampler.binding_table_index,
inst->bits3.sampler.sampler);
err |= control (file, "sampler target format",
sampler_target_format,
inst->bits3.sampler.return_format, NULL);
string (file, ")");
}
break;
case BRW_SFID_DATAPORT_READ:
if (gen >= 6) {
format (file, " (%d, %d, %d, %d)",
inst->bits3.gen6_dp.binding_table_index,
inst->bits3.gen6_dp.msg_control,
inst->bits3.gen6_dp.msg_type,
inst->bits3.gen6_dp.send_commit_msg);
} else if (gen >= 5 /* FINISHME: || is_g4x */) {
format (file, " (%d, %d, %d)",
inst->bits3.dp_read_gen5.binding_table_index,
inst->bits3.dp_read_gen5.msg_control,
inst->bits3.dp_read_gen5.msg_type);
} else {
format (file, " (%d, %d, %d)",
inst->bits3.dp_read.binding_table_index,
inst->bits3.dp_read.msg_control,
inst->bits3.dp_read.msg_type);
}
break;
 
case BRW_SFID_DATAPORT_WRITE:
if (gen >= 7) {
format (file, " (");
 
err |= control (file, "DP rc message type",
dp_rc_msg_type_gen6,
inst->bits3.gen7_dp.msg_type, &space);
 
format (file, ", %d, %d, %d)",
inst->bits3.gen7_dp.binding_table_index,
inst->bits3.gen7_dp.msg_control,
inst->bits3.gen7_dp.msg_type);
} else if (gen == 6) {
format (file, " (");
 
err |= control (file, "DP rc message type",
dp_rc_msg_type_gen6,
inst->bits3.gen6_dp.msg_type, &space);
 
format (file, ", %d, %d, %d, %d)",
inst->bits3.gen6_dp.binding_table_index,
inst->bits3.gen6_dp.msg_control,
inst->bits3.gen6_dp.msg_type,
inst->bits3.gen6_dp.send_commit_msg);
} else {
format (file, " (%d, %d, %d, %d)",
inst->bits3.dp_write.binding_table_index,
(inst->bits3.dp_write.last_render_target << 3) |
inst->bits3.dp_write.msg_control,
inst->bits3.dp_write.msg_type,
inst->bits3.dp_write.send_commit_msg);
}
break;
 
case BRW_SFID_URB:
if (gen >= 5) {
format (file, " %d", inst->bits3.urb_gen5.offset);
} else {
format (file, " %d", inst->bits3.urb.offset);
}
 
space = 1;
if (gen >= 5) {
err |= control (file, "urb opcode", urb_opcode,
inst->bits3.urb_gen5.opcode, &space);
}
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
inst->bits3.urb.allocate, &space);
err |= control (file, "urb used", urb_used,
inst->bits3.urb.used, &space);
err |= control (file, "urb complete", urb_complete,
inst->bits3.urb.complete, &space);
break;
case BRW_SFID_THREAD_SPAWNER:
break;
case GEN7_SFID_DATAPORT_DATA_CACHE:
format (file, " (%d, %d, %d)",
inst->bits3.gen7_dp.binding_table_index,
inst->bits3.gen7_dp.msg_control,
inst->bits3.gen7_dp.msg_type);
break;
 
 
default:
format (file, "unsupported target %d", target);
break;
}
if (space)
string (file, " ");
if (gen >= 5) {
format (file, "mlen %d",
inst->bits3.generic_gen5.msg_length);
format (file, " rlen %d",
inst->bits3.generic_gen5.response_length);
} else {
format (file, "mlen %d",
inst->bits3.generic.msg_length);
format (file, " rlen %d",
inst->bits3.generic.response_length);
}
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
string (file, "{");
space = 1;
err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
if (gen >= 6)
err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
else
err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
 
if (gen >= 6)
err |= qtr_ctrl (file, inst);
else {
if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
opcode[inst->header.opcode].ndst > 0 &&
inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
inst->bits1.da1.dest_reg_nr & (1 << 7)) {
format (file, " compr4");
} else {
err |= control (file, "compression control", compr_ctrl,
inst->header.compression_control, &space);
}
}
 
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
if (gen >= 6)
err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
if (inst->header.opcode == BRW_OPCODE_SEND ||
inst->header.opcode == BRW_OPCODE_SENDC)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
if (space)
string (file, " ");
string (file, "}");
}
string (file, ";");
newline (file);
return err;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_compiler_reg.h
0,0 → 1,800
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef TOY_REG_H
#define TOY_REG_H
 
#include "pipe/p_compiler.h"
#include "util/u_debug.h" /* for assert() */
#include "util/u_math.h" /* for union fi */
 
/* a toy reg is 256-bit wide */
#define TOY_REG_WIDTH 32
 
/**
* Register files.
*/
enum toy_file {
/* virtual register file */
TOY_FILE_VRF,
 
TOY_FILE_ARF,
TOY_FILE_GRF,
TOY_FILE_MRF,
TOY_FILE_IMM,
 
TOY_FILE_COUNT,
};
 
/**
* Register types.
*/
enum toy_type {
TOY_TYPE_F,
TOY_TYPE_D,
TOY_TYPE_UD,
TOY_TYPE_W,
TOY_TYPE_UW,
TOY_TYPE_V, /* only valid for immediates */
 
TOY_TYPE_COUNT,
};
 
/**
* Register rectangles. The three numbers stand for vertical stride, width,
* and horizontal stride respectively.
*/
enum toy_rect {
TOY_RECT_LINEAR,
TOY_RECT_041,
TOY_RECT_010,
TOY_RECT_220,
TOY_RECT_440,
TOY_RECT_240,
 
TOY_RECT_COUNT,
};
 
/**
* Source swizzles. They are compatible with TGSI_SWIZZLE_x and hardware
* values.
*/
enum toy_swizzle {
TOY_SWIZZLE_X = 0,
TOY_SWIZZLE_Y = 1,
TOY_SWIZZLE_Z = 2,
TOY_SWIZZLE_W = 3,
};
 
/**
* Destination writemasks. They are compatible with TGSI_WRITEMASK_x and
* hardware values.
*/
enum toy_writemask {
TOY_WRITEMASK_X = (1 << TOY_SWIZZLE_X),
TOY_WRITEMASK_Y = (1 << TOY_SWIZZLE_Y),
TOY_WRITEMASK_Z = (1 << TOY_SWIZZLE_Z),
TOY_WRITEMASK_W = (1 << TOY_SWIZZLE_W),
TOY_WRITEMASK_XY = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y),
TOY_WRITEMASK_XZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z),
TOY_WRITEMASK_XW = (TOY_WRITEMASK_X | TOY_WRITEMASK_W),
TOY_WRITEMASK_YZ = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
TOY_WRITEMASK_YW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
TOY_WRITEMASK_ZW = (TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
TOY_WRITEMASK_XYZ = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
TOY_WRITEMASK_XYW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
TOY_WRITEMASK_XZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
TOY_WRITEMASK_YZW = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
TOY_WRITEMASK_XYZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y |
TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
};
 
/**
* Destination operand.
*/
struct toy_dst {
unsigned file:3; /* TOY_FILE_x */
unsigned type:3; /* TOY_TYPE_x */
unsigned rect:3; /* TOY_RECT_x */
unsigned indirect:1; /* true or false */
unsigned indirect_subreg:6; /* which subreg of a0? */
 
unsigned writemask:4; /* TOY_WRITEMASK_x */
unsigned pad:12;
 
uint32_t val32;
};
 
/**
* Source operand.
*/
struct toy_src {
unsigned file:3; /* TOY_FILE_x */
unsigned type:3; /* TOY_TYPE_x */
unsigned rect:3; /* TOY_RECT_x */
unsigned indirect:1; /* true or false */
unsigned indirect_subreg:6; /* which subreg of a0? */
 
unsigned swizzle_x:2; /* TOY_SWIZZLE_x */
unsigned swizzle_y:2; /* TOY_SWIZZLE_x */
unsigned swizzle_z:2; /* TOY_SWIZZLE_x */
unsigned swizzle_w:2; /* TOY_SWIZZLE_x */
unsigned absolute:1; /* true or false */
unsigned negate:1; /* true or false */
unsigned pad:6;
 
uint32_t val32;
};
 
/**
* Return true if the file is virtual.
*/
static inline bool
toy_file_is_virtual(enum toy_file file)
{
return (file == TOY_FILE_VRF);
}
 
/**
* Return true if the file is a hardware one.
*/
static inline bool
toy_file_is_hw(enum toy_file file)
{
return !toy_file_is_virtual(file);
}
 
/**
* Return the size of the file.
*/
static inline uint32_t
toy_file_size(enum toy_file file)
{
switch (file) {
case TOY_FILE_GRF:
return 256 * TOY_REG_WIDTH;
case TOY_FILE_MRF:
/* there is no MRF on GEN7+ */
return 256 * TOY_REG_WIDTH;
default:
assert(!"invalid toy file");
return 0;
}
}
 
/**
* Return the size of the type.
*/
static inline int
toy_type_size(enum toy_type type)
{
switch (type) {
case TOY_TYPE_F:
case TOY_TYPE_D:
case TOY_TYPE_UD:
return 4;
case TOY_TYPE_W:
case TOY_TYPE_UW:
return 2;
case TOY_TYPE_V:
default:
assert(!"invalid toy type");
return 0;
}
}
 
/**
* Return true if the destination operand is null.
*/
static inline bool
tdst_is_null(struct toy_dst dst)
{
/* BRW_ARF_NULL happens to be 0 */
return (dst.file == TOY_FILE_ARF && dst.val32 == 0);
}
 
/**
* Validate the destination operand.
*/
static inline struct toy_dst
tdst_validate(struct toy_dst dst)
{
switch (dst.file) {
case TOY_FILE_VRF:
case TOY_FILE_ARF:
case TOY_FILE_MRF:
assert(!dst.indirect);
if (dst.file == TOY_FILE_MRF)
assert(dst.val32 < toy_file_size(dst.file));
break;
case TOY_FILE_GRF:
if (!dst.indirect)
assert(dst.val32 < toy_file_size(dst.file));
break;
case TOY_FILE_IMM:
/* yes, dst can be IMM of type W (for IF/ELSE/ENDIF/WHILE) */
assert(!dst.indirect);
assert(dst.type == TOY_TYPE_W);
break;
default:
assert(!"invalid dst file");
break;
}
 
switch (dst.type) {
case TOY_TYPE_V:
assert(!"invalid dst type");
break;
default:
break;
}
 
assert(dst.rect == TOY_RECT_LINEAR);
if (dst.file != TOY_FILE_IMM)
assert(dst.val32 % toy_type_size(dst.type) == 0);
 
assert(dst.writemask <= TOY_WRITEMASK_XYZW);
 
return dst;
}
 
/**
* Change the type of the destination operand.
*/
static inline struct toy_dst
tdst_type(struct toy_dst dst, enum toy_type type)
{
dst.type = type;
return tdst_validate(dst);
}
 
/**
* Change the type of the destination operand to TOY_TYPE_D.
*/
static inline struct toy_dst
tdst_d(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_D);
}
 
/**
* Change the type of the destination operand to TOY_TYPE_UD.
*/
static inline struct toy_dst
tdst_ud(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_UD);
}
 
/**
* Change the type of the destination operand to TOY_TYPE_W.
*/
static inline struct toy_dst
tdst_w(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_W);
}
 
/**
* Change the type of the destination operand to TOY_TYPE_UW.
*/
static inline struct toy_dst
tdst_uw(struct toy_dst dst)
{
return tdst_type(dst, TOY_TYPE_UW);
}
 
/**
* Change the rectangle of the destination operand.
*/
static inline struct toy_dst
tdst_rect(struct toy_dst dst, enum toy_rect rect)
{
dst.rect = rect;
return tdst_validate(dst);
}
 
/**
* Apply writemask to the destination operand. Note that the current
* writemask is honored.
*/
static inline struct toy_dst
tdst_writemask(struct toy_dst dst, enum toy_writemask writemask)
{
dst.writemask &= writemask;
return tdst_validate(dst);
}
 
/**
* Offset the destination operand.
*/
static inline struct toy_dst
tdst_offset(struct toy_dst dst, int reg, int subreg)
{
dst.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(dst.type);
return tdst_validate(dst);
}
 
/**
* Construct a destination operand.
*/
static inline struct toy_dst
tdst_full(enum toy_file file, enum toy_type type, enum toy_rect rect,
bool indirect, unsigned indirect_subreg,
enum toy_writemask writemask, uint32_t val32)
{
struct toy_dst dst;
 
dst.file = file;
dst.type = type;
dst.rect = rect;
dst.indirect = indirect;
dst.indirect_subreg = indirect_subreg;
dst.writemask = writemask;
dst.pad = 0;
 
dst.val32 = val32;
 
return tdst_validate(dst);
}
 
/**
* Construct a null destination operand.
*/
static inline struct toy_dst
tdst_null(void)
{
static const struct toy_dst null_dst = {
.file = TOY_FILE_ARF,
.type = TOY_TYPE_F,
.rect = TOY_RECT_LINEAR,
.indirect = false,
.indirect_subreg = 0,
.writemask = TOY_WRITEMASK_XYZW,
.pad = 0,
.val32 = 0,
};
 
return null_dst;
}
 
/**
* Construct a destination operand from a source operand.
*/
static inline struct toy_dst
tdst_from(struct toy_src src)
{
const enum toy_writemask writemask =
(1 << src.swizzle_x) |
(1 << src.swizzle_y) |
(1 << src.swizzle_z) |
(1 << src.swizzle_w);
 
return tdst_full(src.file, src.type, src.rect,
src.indirect, src.indirect_subreg, writemask, src.val32);
}
 
/**
* Construct a destination operand, assuming the type is TOY_TYPE_F, the
* rectangle is TOY_RECT_LINEAR, and the writemask is TOY_WRITEMASK_XYZW.
*/
static inline struct toy_dst
tdst(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
{
const enum toy_type type = TOY_TYPE_F;
const enum toy_rect rect = TOY_RECT_LINEAR;
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
 
return tdst_full(file, type, rect,
false, 0, TOY_WRITEMASK_XYZW, val32);
}
 
/**
* Construct an immediate destination operand of type TOY_TYPE_W.
*/
static inline struct toy_dst
tdst_imm_w(int16_t w)
{
const union fi fi = { .i = w };
 
return tdst_full(TOY_FILE_IMM, TOY_TYPE_W, TOY_RECT_LINEAR,
false, 0, TOY_WRITEMASK_XYZW, fi.ui);
}
 
/**
* Return true if the source operand is null.
*/
static inline bool
tsrc_is_null(struct toy_src src)
{
/* BRW_ARF_NULL happens to be 0 */
return (src.file == TOY_FILE_ARF && src.val32 == 0);
}
 
/**
* Return true if the source operand is swizzled.
*/
static inline bool
tsrc_is_swizzled(struct toy_src src)
{
return (src.swizzle_x != TOY_SWIZZLE_X ||
src.swizzle_y != TOY_SWIZZLE_Y ||
src.swizzle_z != TOY_SWIZZLE_Z ||
src.swizzle_w != TOY_SWIZZLE_W);
}
 
/**
* Return true if the source operand is swizzled to the same channel.
*/
static inline bool
tsrc_is_swizzle1(struct toy_src src)
{
return (src.swizzle_x == src.swizzle_y &&
src.swizzle_x == src.swizzle_z &&
src.swizzle_x == src.swizzle_w);
}
 
/**
* Validate the source operand.
*/
static inline struct toy_src
tsrc_validate(struct toy_src src)
{
switch (src.file) {
case TOY_FILE_VRF:
case TOY_FILE_ARF:
case TOY_FILE_MRF:
assert(!src.indirect);
if (src.file == TOY_FILE_MRF)
assert(src.val32 < toy_file_size(src.file));
break;
case TOY_FILE_GRF:
if (!src.indirect)
assert(src.val32 < toy_file_size(src.file));
break;
case TOY_FILE_IMM:
assert(!src.indirect);
break;
default:
assert(!"invalid src file");
break;
}
 
switch (src.type) {
case TOY_TYPE_V:
assert(src.file == TOY_FILE_IMM);
break;
default:
break;
}
 
if (src.file != TOY_FILE_IMM)
assert(src.val32 % toy_type_size(src.type) == 0);
 
assert(src.swizzle_x < 4 && src.swizzle_y < 4 &&
src.swizzle_z < 4 && src.swizzle_w < 4);
 
return src;
}
 
/**
* Change the type of the source operand.
*/
static inline struct toy_src
tsrc_type(struct toy_src src, enum toy_type type)
{
src.type = type;
return tsrc_validate(src);
}
 
/**
* Change the type of the source operand to TOY_TYPE_D.
*/
static inline struct toy_src
tsrc_d(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_D);
}
 
/**
* Change the type of the source operand to TOY_TYPE_UD.
*/
static inline struct toy_src
tsrc_ud(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_UD);
}
 
/**
* Change the type of the source operand to TOY_TYPE_W.
*/
static inline struct toy_src
tsrc_w(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_W);
}
 
/**
* Change the type of the source operand to TOY_TYPE_UW.
*/
static inline struct toy_src
tsrc_uw(struct toy_src src)
{
return tsrc_type(src, TOY_TYPE_UW);
}
 
/**
* Change the rectangle of the source operand.
*/
static inline struct toy_src
tsrc_rect(struct toy_src src, enum toy_rect rect)
{
src.rect = rect;
return tsrc_validate(src);
}
 
/**
* Swizzle the source operand. Note that the current swizzles are honored.
*/
static inline struct toy_src
tsrc_swizzle(struct toy_src src,
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w)
{
const enum toy_swizzle current[4] = {
src.swizzle_x, src.swizzle_y,
src.swizzle_z, src.swizzle_w,
};
 
src.swizzle_x = current[swizzle_x];
src.swizzle_y = current[swizzle_y];
src.swizzle_z = current[swizzle_z];
src.swizzle_w = current[swizzle_w];
 
return tsrc_validate(src);
}
 
/**
* Swizzle the source operand to the same channel. Note that the current
* swizzles are honored.
*/
static inline struct toy_src
tsrc_swizzle1(struct toy_src src, enum toy_swizzle swizzle)
{
return tsrc_swizzle(src, swizzle, swizzle, swizzle, swizzle);
}
 
/**
* Set absolute and unset negate of the source operand.
*/
static inline struct toy_src
tsrc_absolute(struct toy_src src)
{
src.absolute = true;
src.negate = false;
return tsrc_validate(src);
}
 
/**
* Negate the source operand.
*/
static inline struct toy_src
tsrc_negate(struct toy_src src)
{
src.negate = !src.negate;
return tsrc_validate(src);
}
 
/**
* Offset the source operand.
*/
static inline struct toy_src
tsrc_offset(struct toy_src src, int reg, int subreg)
{
src.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(src.type);
return tsrc_validate(src);
}
 
/**
* Construct a source operand.
*/
static inline struct toy_src
tsrc_full(enum toy_file file, enum toy_type type,
enum toy_rect rect, bool indirect, unsigned indirect_subreg,
enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w,
bool absolute, bool negate,
uint32_t val32)
{
struct toy_src src;
 
src.file = file;
src.type = type;
src.rect = rect;
src.indirect = indirect;
src.indirect_subreg = indirect_subreg;
src.swizzle_x = swizzle_x;
src.swizzle_y = swizzle_y;
src.swizzle_z = swizzle_z;
src.swizzle_w = swizzle_w;
src.absolute = absolute;
src.negate = negate;
src.pad = 0;
 
src.val32 = val32;
 
return tsrc_validate(src);
}
 
/**
* Construct a null source operand.
*/
static inline struct toy_src
tsrc_null(void)
{
static const struct toy_src null_src = {
.file = TOY_FILE_ARF,
.type = TOY_TYPE_F,
.rect = TOY_RECT_LINEAR,
.indirect = false,
.indirect_subreg = 0,
.swizzle_x = TOY_SWIZZLE_X,
.swizzle_y = TOY_SWIZZLE_Y,
.swizzle_z = TOY_SWIZZLE_Z,
.swizzle_w = TOY_SWIZZLE_W,
.absolute = false,
.negate = false,
.pad = 0,
.val32 = 0,
};
 
return null_src;
}
 
/**
* Construct a source operand from a destination operand.
*/
static inline struct toy_src
tsrc_from(struct toy_dst dst)
{
enum toy_swizzle swizzle[4];
 
if (dst.writemask == TOY_WRITEMASK_XYZW) {
swizzle[0] = TOY_SWIZZLE_X;
swizzle[1] = TOY_SWIZZLE_Y;
swizzle[2] = TOY_SWIZZLE_Z;
swizzle[3] = TOY_SWIZZLE_W;
}
else {
const enum toy_swizzle first =
(dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X :
(dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y :
(dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z :
(dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W :
TOY_SWIZZLE_X;
 
swizzle[0] = (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : first;
swizzle[1] = (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : first;
swizzle[2] = (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : first;
swizzle[3] = (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : first;
}
 
return tsrc_full(dst.file, dst.type, dst.rect,
dst.indirect, dst.indirect_subreg,
swizzle[0], swizzle[1], swizzle[2], swizzle[3],
false, false, dst.val32);
}
 
/**
* Construct a source operand, assuming the type is TOY_TYPE_F, the
* rectangle is TOY_RECT_LINEAR, and no swizzles/absolute/negate.
*/
static inline struct toy_src
tsrc(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
{
const enum toy_type type = TOY_TYPE_F;
const enum toy_rect rect = TOY_RECT_LINEAR;
const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
 
return tsrc_full(file, type, rect, false, 0,
TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
false, false, val32);
}
 
/**
* Construct an immediate source operand.
*/
static inline struct toy_src
tsrc_imm(enum toy_type type, uint32_t val32)
{
return tsrc_full(TOY_FILE_IMM, type, TOY_RECT_LINEAR, false, 0,
TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
false, false, val32);
}
 
/**
* Construct an immediate source operand of type TOY_TYPE_F.
*/
static inline struct toy_src
tsrc_imm_f(float f)
{
const union fi fi = { .f = f };
return tsrc_imm(TOY_TYPE_F, fi.ui);
}
 
/**
* Construct an immediate source operand of type TOY_TYPE_D.
*/
static inline struct toy_src
tsrc_imm_d(int32_t d)
{
const union fi fi = { .i = d };
return tsrc_imm(TOY_TYPE_D, fi.ui);
}
 
/**
* Construct an immediate source operand of type TOY_TYPE_UD.
*/
static inline struct toy_src
tsrc_imm_ud(uint32_t ud)
{
const union fi fi = { .ui = ud };
return tsrc_imm(TOY_TYPE_UD, fi.ui);
}
 
/**
* Construct an immediate source operand of type TOY_TYPE_W.
*/
static inline struct toy_src
tsrc_imm_w(int16_t w)
{
const union fi fi = { .i = w };
return tsrc_imm(TOY_TYPE_W, fi.ui);
}
 
/**
* Construct an immediate source operand of type TOY_TYPE_UW.
*/
static inline struct toy_src
tsrc_imm_uw(uint16_t uw)
{
const union fi fi = { .ui = uw };
return tsrc_imm(TOY_TYPE_UW, fi.ui);
}
 
/**
* Construct an immediate source operand of type TOY_TYPE_V.
*/
static inline struct toy_src
tsrc_imm_v(uint32_t v)
{
return tsrc_imm(TOY_TYPE_V, v);
}
 
#endif /* TOY_REG_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_helpers.h
0,0 → 1,289
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef TOY_HELPERS_H
#define TOY_HELPERS_H
 
#include "toy_compiler.h"
 
/**
* Transpose a dst operand.
*
* Instead of processing a single vertex with each of its attributes in one
* register, such as
*
* r0 = [x0, y0, z0, w0]
*
* we want to process four vertices at a time
*
* r0 = [x0, y0, z0, w0]
* r1 = [x1, y1, z1, w1]
* r2 = [x2, y2, z2, w2]
* r3 = [x3, y3, z3, w3]
*
* but with the attribute data "transposed"
*
* r0 = [x0, x1, x2, x3]
* r1 = [y0, y1, y2, y3]
* r2 = [z0, z1, z2, z3]
* r3 = [w0, w1, w2, w3]
*
* This is also known as the SoA form.
*/
static inline void
tdst_transpose(struct toy_dst dst, struct toy_dst *trans)
{
int i;
 
switch (dst.file) {
case TOY_FILE_VRF:
assert(!dst.indirect);
for (i = 0; i < 4; i++) {
if (dst.writemask & (1 << i)) {
trans[i] = tdst_offset(dst, i, 0);
trans[i].writemask = TOY_WRITEMASK_XYZW;
}
else {
trans[i] = tdst_null();
}
}
break;
case TOY_FILE_ARF:
assert(tdst_is_null(dst));
for (i = 0; i < 4; i++)
trans[i] = dst;
break;
case TOY_FILE_GRF:
case TOY_FILE_MRF:
case TOY_FILE_IMM:
default:
assert(!"unexpected file in dst transposition");
for (i = 0; i < 4; i++)
trans[i] = tdst_null();
break;
}
}
 
/**
* Transpose a src operand.
*/
static inline void
tsrc_transpose(struct toy_src src, struct toy_src *trans)
{
const enum toy_swizzle swizzle[4] = {
src.swizzle_x, src.swizzle_y,
src.swizzle_z, src.swizzle_w,
};
int i;
 
switch (src.file) {
case TOY_FILE_VRF:
assert(!src.indirect);
for (i = 0; i < 4; i++) {
trans[i] = tsrc_offset(src, swizzle[i], 0);
trans[i].swizzle_x = TOY_SWIZZLE_X;
trans[i].swizzle_y = TOY_SWIZZLE_Y;
trans[i].swizzle_z = TOY_SWIZZLE_Z;
trans[i].swizzle_w = TOY_SWIZZLE_W;
}
break;
case TOY_FILE_ARF:
assert(tsrc_is_null(src));
/* fall through */
case TOY_FILE_IMM:
for (i = 0; i < 4; i++)
trans[i] = src;
break;
case TOY_FILE_GRF:
case TOY_FILE_MRF:
default:
assert(!"unexpected file in src transposition");
for (i = 0; i < 4; i++)
trans[i] = tsrc_null();
break;
}
}
 
static inline struct toy_src
tsrc_imm_mdesc(const struct toy_compiler *tc,
bool eot,
unsigned message_length,
unsigned response_length,
bool header_present,
uint32_t function_control)
{
uint32_t desc;
 
assert(message_length >= 1 && message_length <= 15);
assert(response_length >= 0 && response_length <= 16);
assert(function_control < 1 << 19);
 
desc = eot << 31 |
message_length << 25 |
response_length << 20 |
header_present << 19 |
function_control;
 
return tsrc_imm_ud(desc);
}
 
static inline struct toy_src
tsrc_imm_mdesc_sampler(const struct toy_compiler *tc,
unsigned message_length,
unsigned response_length,
bool header_present,
unsigned simd_mode,
unsigned message_type,
unsigned sampler_index,
unsigned binding_table_index)
{
const bool eot = false;
uint32_t ctrl;
 
assert(simd_mode < 4);
assert(sampler_index < 16);
assert(binding_table_index < 256);
 
if (tc->dev->gen >= ILO_GEN(7)) {
ctrl = simd_mode << 17 |
message_type << 12 |
sampler_index << 8 |
binding_table_index;
}
else {
ctrl = simd_mode << 16 |
message_type << 12 |
sampler_index << 8 |
binding_table_index;
}
 
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
 
static inline struct toy_src
tsrc_imm_mdesc_data_port(const struct toy_compiler *tc,
bool eot,
unsigned message_length,
unsigned response_length,
bool header_present,
bool send_write_commit_message,
unsigned message_type,
unsigned message_specific_control,
unsigned binding_table_index)
{
uint32_t ctrl;
 
if (tc->dev->gen >= ILO_GEN(7)) {
assert(!send_write_commit_message);
assert((message_specific_control & 0x3f00) == message_specific_control);
 
ctrl = message_type << 14 |
(message_specific_control & 0x3f00) |
binding_table_index;
}
else {
assert(!send_write_commit_message ||
message_type == GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE);
assert((message_specific_control & 0x1f00) == message_specific_control);
 
ctrl = send_write_commit_message << 17 |
message_type << 13 |
(message_specific_control & 0x1f00) |
binding_table_index;
}
 
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
 
static inline struct toy_src
tsrc_imm_mdesc_data_port_scratch(const struct toy_compiler *tc,
unsigned message_length,
unsigned response_length,
bool write_type,
bool dword_mode,
bool invalidate_after_read,
int num_registers,
int hword_offset)
{
const bool eot = false;
const bool header_present = true;
uint32_t ctrl;
 
assert(tc->dev->gen >= ILO_GEN(7));
assert(num_registers == 1 || num_registers == 2 || num_registers == 4);
 
ctrl = 1 << 18 |
write_type << 17 |
dword_mode << 16 |
invalidate_after_read << 15 |
(num_registers - 1) << 12 |
hword_offset;
 
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
 
static inline struct toy_src
tsrc_imm_mdesc_urb(const struct toy_compiler *tc,
bool eot,
unsigned message_length,
unsigned response_length,
bool complete,
bool used,
bool allocate,
unsigned swizzle_control,
unsigned global_offset,
unsigned urb_opcode)
{
const bool header_present = true;
uint32_t ctrl;
 
if (tc->dev->gen >= ILO_GEN(7)) {
const bool per_slot_offset = false;
 
ctrl = per_slot_offset << 16 |
complete << 15 |
swizzle_control << 14 |
global_offset << 3 |
urb_opcode;
}
else {
ctrl = complete << 15 |
used << 14 |
allocate << 13 |
swizzle_control << 10 |
global_offset << 4 |
urb_opcode;
}
 
return tsrc_imm_mdesc(tc, eot, message_length,
response_length, header_present, ctrl);
}
 
#endif /* TOY_HELPERS_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_legalize.c
0,0 → 1,632
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "pipe/p_shader_tokens.h"
#include "toy_compiler.h"
#include "toy_tgsi.h"
#include "toy_helpers.h"
#include "toy_legalize.h"
 
/**
* Lower an instruction to BRW_OPCODE_SEND(C).
*/
void
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
bool sendc, unsigned sfid)
{
assert(inst->opcode >= 128);
 
inst->opcode = (sendc) ? BRW_OPCODE_SENDC : BRW_OPCODE_SEND;
 
/* thread control is reserved */
assert(inst->thread_ctrl == 0);
 
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
inst->cond_modifier = sfid;
}
 
static int
math_op_to_func(unsigned opcode)
{
switch (opcode) {
case TOY_OPCODE_INV: return BRW_MATH_FUNCTION_INV;
case TOY_OPCODE_LOG: return BRW_MATH_FUNCTION_LOG;
case TOY_OPCODE_EXP: return BRW_MATH_FUNCTION_EXP;
case TOY_OPCODE_SQRT: return BRW_MATH_FUNCTION_SQRT;
case TOY_OPCODE_RSQ: return BRW_MATH_FUNCTION_RSQ;
case TOY_OPCODE_SIN: return BRW_MATH_FUNCTION_SIN;
case TOY_OPCODE_COS: return BRW_MATH_FUNCTION_COS;
case TOY_OPCODE_FDIV: return BRW_MATH_FUNCTION_FDIV;
case TOY_OPCODE_POW: return BRW_MATH_FUNCTION_POW;
case TOY_OPCODE_INT_DIV_QUOTIENT: return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
case TOY_OPCODE_INT_DIV_REMAINDER: return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
default:
assert(!"unknown math opcode");
return -1;
}
}
 
/**
* Lower virtual math opcodes to BRW_OPCODE_MATH.
*/
void
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_dst tmp;
int i;
 
/* see commit 250770b74d33bb8625c780a74a89477af033d13a */
for (i = 0; i < Elements(inst->src); i++) {
if (tsrc_is_null(inst->src[i]))
break;
 
/* no swizzling in align1 */
/* XXX how about source modifiers? */
if (toy_file_is_virtual(inst->src[i].file) &&
!tsrc_is_swizzled(inst->src[i]) &&
!inst->src[i].absolute &&
!inst->src[i].negate)
continue;
 
tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
tc_MOV(tc, tmp, inst->src[i]);
inst->src[i] = tsrc_from(tmp);
}
 
/* FC[0:3] */
assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
inst->cond_modifier = math_op_to_func(inst->opcode);
/* FC[4:5] */
assert(inst->thread_ctrl == 0);
inst->thread_ctrl = 0;
 
inst->opcode = BRW_OPCODE_MATH;
tc_move_inst(tc, inst);
 
/* no writemask in align1 */
if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
struct toy_dst dst = inst->dst;
struct toy_inst *inst2;
 
tmp = tc_alloc_tmp(tc);
tmp.type = inst->dst.type;
inst->dst = tmp;
 
inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
inst2->pred_ctrl = inst->pred_ctrl;
}
}
 
static uint32_t
absolute_imm(uint32_t imm32, enum toy_type type)
{
union fi val = { .ui = imm32 };
 
switch (type) {
case TOY_TYPE_F:
val.f = fabs(val.f);
break;
case TOY_TYPE_D:
if (val.i < 0)
val.i = -val.i;
break;
case TOY_TYPE_W:
if ((int16_t) (val.ui & 0xffff) < 0)
val.i = -((int16_t) (val.ui & 0xffff));
break;
case TOY_TYPE_V:
assert(!"cannot take absoulte of immediates of type V");
break;
default:
break;
}
 
return val.ui;
}
 
static uint32_t
negate_imm(uint32_t imm32, enum toy_type type)
{
union fi val = { .ui = imm32 };
 
switch (type) {
case TOY_TYPE_F:
val.f = -val.f;
break;
case TOY_TYPE_D:
case TOY_TYPE_UD:
val.i = -val.i;
break;
case TOY_TYPE_W:
case TOY_TYPE_UW:
val.i = -((int16_t) (val.ui & 0xffff));
break;
default:
assert(!"negate immediate of unknown type");
break;
}
 
return val.ui;
}
 
static void
validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
{
bool move_inst = false;
int i;
 
for (i = 0; i < Elements(inst->src); i++) {
struct toy_dst tmp;
 
if (tsrc_is_null(inst->src[i]))
break;
 
if (inst->src[i].file != TOY_FILE_IMM)
continue;
 
if (inst->src[i].absolute) {
inst->src[i].val32 =
absolute_imm(inst->src[i].val32, inst->src[i].type);
inst->src[i].absolute = false;
}
 
if (inst->src[i].negate) {
inst->src[i].val32 =
negate_imm(inst->src[i].val32, inst->src[i].type);
inst->src[i].negate = false;
}
 
/* this is the last operand */
if (i + 1 == Elements(inst->src) || tsrc_is_null(inst->src[i + 1]))
break;
 
/* need to use a temp if this imm is not the last operand */
/* TODO we should simply swap the operands if the op is commutative */
tmp = tc_alloc_tmp(tc);
tmp = tdst_type(tmp, inst->src[i].type);
tc_MOV(tc, tmp, inst->src[i]);
inst->src[i] = tsrc_from(tmp);
 
move_inst = true;
}
 
if (move_inst)
tc_move_inst(tc, inst);
}
 
static void
lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
{
const enum toy_type inst_type = inst->dst.type;
const struct toy_dst acc0 =
tdst_type(tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0), inst_type);
struct toy_inst *inst2;
 
/* only need to take care of integer multiplications */
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
return;
 
/* acc0 = (src0 & 0x0000ffff) * src1 */
tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
 
/* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
inst2 = tc_add2(tc, BRW_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
inst->src[0], inst->src[1]);
inst2->acc_wr_ctrl = true;
 
/* dst = acc0 & 0xffffffff */
tc_MOV(tc, inst->dst, tsrc_from(acc0));
 
tc_discard_inst(tc, inst);
}
 
static void
lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
{
const enum toy_type inst_type = inst->dst.type;
 
if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
const struct toy_dst acc0 = tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0);
 
tc_MOV(tc, acc0, inst->src[2]);
inst->src[2] = tsrc_null();
tc_move_inst(tc, inst);
}
else {
struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
struct toy_inst *inst2;
 
inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
lower_opcode_mul(tc, inst2);
 
tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
 
tc_discard_inst(tc, inst);
}
}
 
/**
* Legalize the instructions for register allocation.
*/
void
toy_compiler_legalize_for_ra(struct toy_compiler *tc)
{
struct toy_inst *inst;
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case BRW_OPCODE_MAC:
lower_opcode_mac(tc, inst);
break;
case BRW_OPCODE_MAD:
/* TODO operands must be floats */
break;
case BRW_OPCODE_MUL:
lower_opcode_mul(tc, inst);
break;
default:
if (inst->opcode > TOY_OPCODE_LAST_HW)
tc_fail(tc, "internal opcodes not lowered");
}
}
 
/* loop again as the previous pass may add new instructions */
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
validate_imm(tc, inst);
}
}
 
static void
patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2;
int nest_level, dist;
 
nest_level = 0;
dist = -1;
 
/* search backward */
LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
&tc->instructions, list) {
if (inst2->marker) {
if (inst2->opcode == BRW_OPCODE_DO) {
if (nest_level) {
nest_level--;
}
else {
/* the following instruction */
dist++;
break;
}
}
 
continue;
}
 
if (inst2->opcode == BRW_OPCODE_WHILE)
nest_level++;
 
dist--;
}
 
if (tc->dev->gen >= ILO_GEN(7))
inst->src[1] = tsrc_imm_w(dist * 2);
else
inst->dst = tdst_imm_w(dist * 2);
}
 
static void
patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2;
int nest_level, dist;
int jip, uip;
 
nest_level = 0;
dist = 1;
jip = 0;
uip = 0;
 
/* search forward */
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
if (inst2->marker)
continue;
 
if (inst2->opcode == BRW_OPCODE_ENDIF) {
if (nest_level) {
nest_level--;
}
else {
uip = dist * 2;
if (!jip)
jip = uip;
break;
}
}
else if (inst2->opcode == BRW_OPCODE_ELSE &&
inst->opcode == BRW_OPCODE_IF) {
if (!nest_level) {
/* the following instruction */
jip = (dist + 1) * 2;
 
if (tc->dev->gen == ILO_GEN(6)) {
uip = jip;
break;
}
}
}
else if (inst2->opcode == BRW_OPCODE_IF) {
nest_level++;
}
 
dist++;
}
 
if (tc->dev->gen >= ILO_GEN(7)) {
/* what should the type be? */
inst->dst.type = TOY_TYPE_D;
inst->src[0].type = TOY_TYPE_D;
inst->src[1] = tsrc_imm_d(uip << 16 | jip);
}
else {
inst->dst = tdst_imm_w(jip);
}
 
inst->thread_ctrl = BRW_THREAD_SWITCH;
}
 
static void
patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2;
bool found = false;
int dist = 1;
 
/* search forward for instructions that may enable channels */
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
if (inst2->marker)
continue;
 
switch (inst2->opcode) {
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
found = true;
break;
default:
break;
}
 
if (found)
break;
 
dist++;
}
 
/* should we set dist to (dist - 1) or 1? */
if (!found)
dist = 1;
 
if (tc->dev->gen >= ILO_GEN(7))
inst->src[1] = tsrc_imm_w(dist * 2);
else
inst->dst = tdst_imm_w(dist * 2);
 
inst->thread_ctrl = BRW_THREAD_SWITCH;
}
 
static void
patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
{
struct toy_inst *inst2, *inst3;
int nest_level, dist, jip, uip;
 
nest_level = 0;
dist = 1;
jip = 1 * 2;
uip = 1 * 2;
 
/* search forward */
LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
if (inst2->marker) {
if (inst2->opcode == BRW_OPCODE_DO)
nest_level++;
continue;
}
 
if (inst2->opcode == BRW_OPCODE_ELSE ||
inst2->opcode == BRW_OPCODE_ENDIF ||
inst2->opcode == BRW_OPCODE_WHILE) {
jip = dist * 2;
break;
}
 
dist++;
}
 
/* go on to determine uip */
inst3 = inst2;
LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
if (inst2->marker) {
if (inst2->opcode == BRW_OPCODE_DO)
nest_level++;
continue;
}
 
if (inst2->opcode == BRW_OPCODE_WHILE) {
if (nest_level) {
nest_level--;
}
else {
/* the following instruction */
if (tc->dev->gen == ILO_GEN(6) && inst->opcode == BRW_OPCODE_BREAK)
dist++;
 
uip = dist * 2;
break;
}
}
 
dist++;
}
 
/* should the type be D or W? */
inst->dst.type = TOY_TYPE_D;
inst->src[0].type = TOY_TYPE_D;
inst->src[1] = tsrc_imm_d(uip << 16 | jip);
}
 
/**
* Legalize the instructions for assembling.
*/
void
toy_compiler_legalize_for_asm(struct toy_compiler *tc)
{
struct toy_inst *inst;
int pc = 0;
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
int i;
 
pc++;
 
/*
* From the Sandy Bridge PRM, volume 4 part 2, page 112:
*
* "Specifically, for instructions with a single source, it only
* uses the first source operand <src0>. In this case, the second
* source operand <src1> must be set to null and also with the same
* type as the first source operand <src0>. It is a special case
* when <src0> is an immediate, as an immediate <src0> uses DW3 of
* the instruction word, which is normally used by <src1>. In this
* case, <src1> must be programmed with register file ARF and the
* same data type as <src0>."
*
* Since we already fill unused operands with null, we only need to take
* care of the type.
*/
if (tsrc_is_null(inst->src[1]))
inst->src[1].type = inst->src[0].type;
 
switch (inst->opcode) {
case BRW_OPCODE_MATH:
/* math does not support align16 nor exec_size > 8 */
inst->access_mode = BRW_ALIGN_1;
 
if (inst->exec_size == BRW_EXECUTE_16) {
/*
* From the Ivy Bridge PRM, volume 4 part 3, page 192:
*
* "INT DIV function does not support SIMD16."
*/
if (tc->dev->gen < ILO_GEN(7) ||
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_REMAINDER) {
struct toy_inst *inst2;
 
inst->exec_size = BRW_EXECUTE_8;
inst->qtr_ctrl = GEN6_COMPRESSION_1Q;
 
inst2 = tc_duplicate_inst(tc, inst);
inst2->qtr_ctrl = GEN6_COMPRESSION_2Q;
inst2->dst = tdst_offset(inst2->dst, 1, 0);
inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
if (!tsrc_is_null(inst2->src[1]))
inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
 
pc++;
}
}
break;
case BRW_OPCODE_IF:
if (tc->dev->gen >= ILO_GEN(7) &&
inst->cond_modifier != BRW_CONDITIONAL_NONE) {
struct toy_inst *inst2;
 
inst2 = tc_duplicate_inst(tc, inst);
 
/* replace the original IF by CMP */
inst->opcode = BRW_OPCODE_CMP;
 
/* predicate control instead of condition modifier */
inst2->dst = tdst_null();
inst2->src[0] = tsrc_null();
inst2->src[1] = tsrc_null();
inst2->cond_modifier = BRW_CONDITIONAL_NONE;
inst2->pred_ctrl = BRW_PREDICATE_NORMAL;
 
pc++;
}
break;
default:
break;
}
 
/* MRF to GRF */
if (tc->dev->gen >= ILO_GEN(7)) {
for (i = 0; i < Elements(inst->src); i++) {
if (inst->src[i].file != TOY_FILE_MRF)
continue;
else if (tsrc_is_null(inst->src[i]))
break;
 
inst->src[i].file = TOY_FILE_GRF;
}
 
if (inst->dst.file == TOY_FILE_MRF)
inst->dst.file = TOY_FILE_GRF;
}
}
 
tc->num_instructions = pc;
 
/* set JIP/UIP */
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case BRW_OPCODE_IF:
case BRW_OPCODE_ELSE:
patch_if_else_jip(tc, inst);
break;
case BRW_OPCODE_ENDIF:
patch_endif_jip(tc, inst);
break;
case BRW_OPCODE_WHILE:
patch_while_jip(tc, inst);
break;
case BRW_OPCODE_BREAK:
case BRW_OPCODE_CONTINUE:
patch_break_continue_jip(tc, inst);
break;
default:
break;
}
}
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_legalize.h
0,0 → 1,52
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef TOY_LEGALIZE_H
#define TOY_LEGALIZE_H
 
#include "toy_compiler.h"
#include "toy_tgsi.h"
 
void
toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
bool sendc, unsigned sfid);
 
void
toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst);
 
void
toy_compiler_allocate_registers(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf);
 
void
toy_compiler_legalize_for_ra(struct toy_compiler *tc);
 
void
toy_compiler_legalize_for_asm(struct toy_compiler *tc);
 
#endif /* TOY_LEGALIZE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_legalize_ra.c
0,0 → 1,628
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include <stdlib.h> /* for qsort() */
#include "toy_compiler.h"
#include "toy_legalize.h"
 
/**
* Live interval of a VRF register.
*/
struct linear_scan_live_interval {
int vrf;
int startpoint;
int endpoint;
 
/*
* should this be assigned a consecutive register of the previous
* interval's?
*/
bool consecutive;
 
int reg;
 
struct list_head list;
};
 
/**
* Linear scan.
*/
struct linear_scan {
struct linear_scan_live_interval *intervals;
int max_vrf, num_vrfs;
 
int num_regs;
 
struct list_head active_list;
int *free_regs;
int num_free_regs;
 
int *vrf_mapping;
};
 
/**
* Return a chunk of registers to the free register pool.
*/
static void
linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
{
int i;
 
for (i = 0; i < count; i++)
ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
}
 
static int
linear_scan_compare_regs(const void *elem1, const void *elem2)
{
const int *reg1 = elem1;
const int *reg2 = elem2;
 
/* in reverse order */
return (*reg2 - *reg1);
}
 
/**
* Allocate a chunk of registers from the free register pool.
*/
static int
linear_scan_allocate_regs(struct linear_scan *ls, int count)
{
bool sorted = false;
int reg;
 
/* simple cases */
if (count > ls->num_free_regs)
return -1;
else if (count == 1)
return ls->free_regs[--ls->num_free_regs];
 
/* TODO a free register pool */
/* TODO reserve some regs for spilling */
while (true) {
bool found = false;
int start;
 
/*
* find a chunk of registers that have consecutive register
* numbers
*/
for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
int i;
 
for (i = 1; i < count; i++) {
if (ls->free_regs[start - i] != ls->free_regs[start] + i)
break;
}
 
if (i >= count) {
found = true;
break;
}
}
 
if (found) {
reg = ls->free_regs[start];
 
if (start != ls->num_free_regs - 1) {
start++;
memmove(&ls->free_regs[start - count],
&ls->free_regs[start],
sizeof(*ls->free_regs) * (ls->num_free_regs - start));
}
ls->num_free_regs -= count;
break;
}
else if (!sorted) {
/* sort and retry */
qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
linear_scan_compare_regs);
sorted = true;
}
else {
/* failed */
reg = -1;
break;
}
}
 
return reg;
}
 
/**
* Add an interval to the active list.
*/
static void
linear_scan_add_active(struct linear_scan *ls,
struct linear_scan_live_interval *interval)
{
struct linear_scan_live_interval *pos;
 
/* keep the active list sorted by endpoints */
LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
if (pos->endpoint >= interval->endpoint)
break;
}
 
list_addtail(&interval->list, &pos->list);
}
 
/**
* Remove an interval from the active list.
*/
static void
linear_scan_remove_active(struct linear_scan *ls,
struct linear_scan_live_interval *interval)
{
list_del(&interval->list);
}
 
/**
* Remove intervals that are no longer active from the active list.
*/
static void
linear_scan_expire_active(struct linear_scan *ls, int pc)
{
struct linear_scan_live_interval *interval, *next;
 
LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
/*
* since we sort intervals on the active list by their endpoints, we
* know that this and the rest of the intervals are still active.
*/
if (interval->endpoint >= pc)
break;
 
linear_scan_remove_active(ls, interval);
 
/* recycle the reg */
linear_scan_free_regs(ls, interval->reg, 1);
}
}
 
/**
* Spill an interval.
*/
static void
linear_scan_spill(struct linear_scan *ls,
struct linear_scan_live_interval *interval,
bool is_active)
{
assert(!"no spilling support");
}
 
/**
* Spill a range of intervals.
*/
static void
linear_scan_spill_range(struct linear_scan *ls, int first, int count)
{
int i;
 
for (i = 0; i < count; i++) {
struct linear_scan_live_interval *interval = &ls->intervals[first + i];
 
linear_scan_spill(ls, interval, false);
}
}
 
/**
* Perform linear scan to allocate registers for the intervals.
*/
static bool
linear_scan_run(struct linear_scan *ls)
{
int i;
 
i = 0;
while (i < ls->num_vrfs) {
struct linear_scan_live_interval *first = &ls->intervals[i];
int reg, count;
 
/*
* BRW_OPCODE_SEND may write to multiple consecutive registers and we need to
* support that
*/
for (count = 1; i + count < ls->num_vrfs; count++) {
const struct linear_scan_live_interval *interval =
&ls->intervals[i + count];
 
if (interval->startpoint != first->startpoint ||
!interval->consecutive)
break;
}
 
reg = linear_scan_allocate_regs(ls, count);
 
/* expire intervals that are no longer active and try again */
if (reg < 0) {
linear_scan_expire_active(ls, first->startpoint);
reg = linear_scan_allocate_regs(ls, count);
}
 
/* have to spill some intervals */
if (reg < 0) {
struct linear_scan_live_interval *last_active =
container_of(ls->active_list.prev,
(struct linear_scan_live_interval *) NULL, list);
 
/* heuristically spill the interval that ends last */
if (count > 1 || last_active->endpoint < first->endpoint) {
linear_scan_spill_range(ls, i, count);
i += count;
continue;
}
 
/* make some room for the new interval */
linear_scan_spill(ls, last_active, true);
reg = linear_scan_allocate_regs(ls, count);
if (reg < 0) {
assert(!"failed to spill any register");
return false;
}
}
 
while (count--) {
struct linear_scan_live_interval *interval = &ls->intervals[i++];
 
interval->reg = reg++;
linear_scan_add_active(ls, interval);
 
ls->vrf_mapping[interval->vrf] = interval->reg;
 
/*
* this should and must be the case because of how we initialized the
* intervals
*/
assert(interval->vrf - first->vrf == interval->reg - first->reg);
}
}
 
return true;
}
 
/**
* Add a new interval.
*/
static void
linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
{
if (ls->intervals[vrf].vrf)
return;
 
ls->intervals[vrf].vrf = vrf;
ls->intervals[vrf].startpoint = pc;
 
ls->num_vrfs++;
if (vrf > ls->max_vrf)
ls->max_vrf = vrf;
}
 
/**
* Perform (oversimplified?) live variable analysis.
*/
static void
linear_scan_init_live_intervals(struct linear_scan *ls,
struct toy_compiler *tc)
{
const struct toy_inst *inst;
int pc, do_pc, while_pc;
 
pc = 0;
do_pc = -1;
while_pc = -1;
 
tc_head(tc);
while ((inst = tc_next_no_skip(tc)) != NULL) {
const int startpoint = (pc <= while_pc) ? do_pc : pc;
const int endpoint = (pc <= while_pc) ? while_pc : pc;
int vrf, i;
 
/*
* assume all registers used in this outermost loop are live through out
* the whole loop
*/
if (inst->marker) {
if (pc > while_pc) {
struct toy_inst *inst2;
int loop_level = 1;
 
assert(inst->opcode == BRW_OPCODE_DO);
do_pc = pc;
while_pc = pc + 1;
 
/* find the matching BRW_OPCODE_WHILE */
LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
&tc->instructions, list) {
if (inst2->marker) {
assert(inst->opcode == BRW_OPCODE_DO);
loop_level++;
continue;
}
 
if (inst2->opcode == BRW_OPCODE_WHILE) {
loop_level--;
if (!loop_level)
break;
}
while_pc++;
}
}
 
continue;
}
 
if (inst->dst.file == TOY_FILE_VRF) {
int num_dst;
 
/* TODO this is a hack */
if (inst->opcode == BRW_OPCODE_SEND ||
inst->opcode == BRW_OPCODE_SENDC) {
const uint32_t mdesc = inst->src[1].val32;
int response_length = (mdesc >> 20) & 0x1f;
 
num_dst = response_length;
if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16)
num_dst /= 2;
}
else {
num_dst = 1;
}
 
vrf = inst->dst.val32 / TOY_REG_WIDTH;
 
for (i = 0; i < num_dst; i++) {
/* first use */
if (!ls->intervals[vrf].vrf)
linear_scan_add_live_interval(ls, vrf, startpoint);
 
ls->intervals[vrf].endpoint = endpoint;
ls->intervals[vrf].consecutive = (i > 0);
 
vrf++;
}
}
 
for (i = 0; i < Elements(inst->src); i++) {
if (inst->src[i].file != TOY_FILE_VRF)
continue;
 
vrf = inst->src[i].val32 / TOY_REG_WIDTH;
 
/* first use */
if (!ls->intervals[vrf].vrf)
linear_scan_add_live_interval(ls, vrf, startpoint);
 
ls->intervals[vrf].endpoint = endpoint;
}
 
pc++;
}
}
 
/**
* Clean up after performing linear scan.
*/
static void
linear_scan_cleanup(struct linear_scan *ls)
{
FREE(ls->vrf_mapping);
FREE(ls->intervals);
FREE(ls->free_regs);
}
 
static int
linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
{
const struct linear_scan_live_interval *interval1 = elem1;
const struct linear_scan_live_interval *interval2 = elem2;
 
/* make unused elements appear at the end */
if (!interval1->vrf)
return 1;
else if (!interval2->vrf)
return -1;
 
/* sort by startpoints first, and then by vrf */
if (interval1->startpoint != interval2->startpoint)
return (interval1->startpoint - interval2->startpoint);
else
return (interval1->vrf - interval2->vrf);
 
}
 
/**
* Prepare for linear scan.
*/
static bool
linear_scan_init(struct linear_scan *ls, int num_regs,
struct toy_compiler *tc)
{
int num_intervals, i;
 
memset(ls, 0, sizeof(*ls));
 
/* this may be much larger than ls->num_vrfs... */
num_intervals = tc->next_vrf;
ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
if (!ls->intervals)
return false;
 
linear_scan_init_live_intervals(ls, tc);
/* sort intervals by startpoints */
qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
linear_scan_compare_live_intervals);
 
ls->num_regs = num_regs;
ls->num_free_regs = num_regs;
 
ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
if (!ls->free_regs) {
FREE(ls->intervals);
return false;
}
 
/* add in reverse order as we will allocate from the tail */
for (i = 0; i < ls->num_regs; i++)
ls->free_regs[i] = num_regs - i - 1;
 
list_inithead(&ls->active_list);
 
ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
if (!ls->vrf_mapping) {
FREE(ls->intervals);
FREE(ls->free_regs);
return false;
}
 
return true;
}
 
/**
* Allocate registers with linear scan.
*/
static void
linear_scan_allocation(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf)
{
const int num_grfs = end_grf - start_grf + 1;
struct linear_scan ls;
struct toy_inst *inst;
 
if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
return;
 
if (!linear_scan_run(&ls)) {
tc_fail(tc, "failed to allocate registers");
return;
}
 
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
int i;
 
if (inst->dst.file == TOY_FILE_VRF) {
const uint32_t val32 = inst->dst.val32;
int reg = val32 / TOY_REG_WIDTH;
int subreg = val32 % TOY_REG_WIDTH;
 
/* map to GRF */
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
 
inst->dst.file = TOY_FILE_GRF;
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
}
 
for (i = 0; i < Elements(inst->src); i++) {
const uint32_t val32 = inst->src[i].val32;
int reg, subreg;
 
if (inst->src[i].file != TOY_FILE_VRF)
continue;
 
reg = val32 / TOY_REG_WIDTH;
subreg = val32 % TOY_REG_WIDTH;
 
/* map to GRF */
reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
 
inst->src[i].file = TOY_FILE_GRF;
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
}
}
 
linear_scan_cleanup(&ls);
}
 
/**
* Trivially allocate registers.
*/
static void
trivial_allocation(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf)
{
struct toy_inst *inst;
int max_grf = -1;
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
int i;
 
if (inst->dst.file == TOY_FILE_VRF) {
const uint32_t val32 = inst->dst.val32;
int reg = val32 / TOY_REG_WIDTH;
int subreg = val32 % TOY_REG_WIDTH;
 
reg = reg * num_grf_per_vrf + start_grf - 1;
 
inst->dst.file = TOY_FILE_GRF;
inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
 
if (reg > max_grf)
max_grf = reg;
}
 
for (i = 0; i < Elements(inst->src); i++) {
const uint32_t val32 = inst->src[i].val32;
int reg, subreg;
 
if (inst->src[i].file != TOY_FILE_VRF)
continue;
 
reg = val32 / TOY_REG_WIDTH;
subreg = val32 % TOY_REG_WIDTH;
 
reg = reg * num_grf_per_vrf + start_grf - 1;
 
inst->src[i].file = TOY_FILE_GRF;
inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
 
if (reg > max_grf)
max_grf = reg;
}
}
 
if (max_grf + num_grf_per_vrf - 1 > end_grf)
tc_fail(tc, "failed to allocate registers");
}
 
/**
* Allocate GRF registers to VRF registers.
*/
void
toy_compiler_allocate_registers(struct toy_compiler *tc,
int start_grf, int end_grf,
int num_grf_per_vrf)
{
if (true)
linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
else
trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_optimize.c
0,0 → 1,71
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "toy_compiler.h"
#include "toy_tgsi.h"
#include "toy_optimize.h"
 
/**
* This just eliminates instructions with null dst so far.
*/
static void
eliminate_dead_code(struct toy_compiler *tc)
{
struct toy_inst *inst;
 
tc_head(tc);
while ((inst = tc_next(tc)) != NULL) {
switch (inst->opcode) {
case BRW_OPCODE_IF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_WHILE:
case BRW_OPCODE_BREAK:
case BRW_OPCODE_CONTINUE:
case BRW_OPCODE_SEND:
case BRW_OPCODE_SENDC:
case BRW_OPCODE_NOP:
/* never eliminated */
break;
default:
if (tdst_is_null(inst->dst) || !inst->dst.writemask) {
/* math is always BRW_CONDITIONAL_NONE */
if ((inst->opcode == BRW_OPCODE_MATH ||
inst->cond_modifier == BRW_CONDITIONAL_NONE) &&
!inst->acc_wr_ctrl)
tc_discard_inst(tc, inst);
}
break;
}
}
}
 
void
toy_compiler_optimize(struct toy_compiler *tc)
{
eliminate_dead_code(tc);
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_optimize.h
0,0 → 1,36
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef TOY_OPTIMIZE_H
#define TOY_OPTIMIZE_H
 
#include "toy_compiler.h"
 
void
toy_compiler_optimize(struct toy_compiler *tc);
 
#endif /* TOY_OPTIMIZE_H */
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_tgsi.c
0,0 → 1,2677
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_strings.h"
#include "util/u_hash_table.h"
#include "toy_helpers.h"
#include "toy_tgsi.h"
 
/* map TGSI opcode to GEN opcode 1-to-1 */
static const struct {
int opcode;
int num_dst;
int num_src;
} aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_ARL] = { BRW_OPCODE_RNDD, 1, 1 },
[TGSI_OPCODE_MOV] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_RCP] = { TOY_OPCODE_INV, 1, 1 },
[TGSI_OPCODE_RSQ] = { TOY_OPCODE_RSQ, 1, 1 },
[TGSI_OPCODE_MUL] = { BRW_OPCODE_MUL, 1, 2 },
[TGSI_OPCODE_ADD] = { BRW_OPCODE_ADD, 1, 2 },
[TGSI_OPCODE_DP3] = { BRW_OPCODE_DP3, 1, 2 },
[TGSI_OPCODE_DP4] = { BRW_OPCODE_DP4, 1, 2 },
[TGSI_OPCODE_MIN] = { BRW_OPCODE_SEL, 1, 2 },
[TGSI_OPCODE_MAX] = { BRW_OPCODE_SEL, 1, 2 },
/* a later pass will move src[2] to accumulator */
[TGSI_OPCODE_MAD] = { BRW_OPCODE_MAC, 1, 3 },
[TGSI_OPCODE_SUB] = { BRW_OPCODE_ADD, 1, 2 },
[TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 },
[TGSI_OPCODE_FRC] = { BRW_OPCODE_FRC, 1, 1 },
[TGSI_OPCODE_FLR] = { BRW_OPCODE_RNDD, 1, 1 },
[TGSI_OPCODE_ROUND] = { BRW_OPCODE_RNDE, 1, 1 },
[TGSI_OPCODE_EX2] = { TOY_OPCODE_EXP, 1, 1 },
[TGSI_OPCODE_LG2] = { TOY_OPCODE_LOG, 1, 1 },
[TGSI_OPCODE_POW] = { TOY_OPCODE_POW, 1, 2 },
[TGSI_OPCODE_ABS] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_DPH] = { BRW_OPCODE_DPH, 1, 2 },
[TGSI_OPCODE_COS] = { TOY_OPCODE_COS, 1, 1 },
[TGSI_OPCODE_KILL] = { TOY_OPCODE_KIL, 0, 0 },
[TGSI_OPCODE_SIN] = { TOY_OPCODE_SIN, 1, 1 },
[TGSI_OPCODE_ARR] = { BRW_OPCODE_RNDZ, 1, 1 },
[TGSI_OPCODE_DP2] = { BRW_OPCODE_DP2, 1, 2 },
[TGSI_OPCODE_IF] = { BRW_OPCODE_IF, 0, 1 },
[TGSI_OPCODE_UIF] = { BRW_OPCODE_IF, 0, 1 },
[TGSI_OPCODE_ELSE] = { BRW_OPCODE_ELSE, 0, 0 },
[TGSI_OPCODE_ENDIF] = { BRW_OPCODE_ENDIF, 0, 0 },
[TGSI_OPCODE_I2F] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_NOT] = { BRW_OPCODE_NOT, 1, 1 },
[TGSI_OPCODE_TRUNC] = { BRW_OPCODE_RNDZ, 1, 1 },
[TGSI_OPCODE_SHL] = { BRW_OPCODE_SHL, 1, 2 },
[TGSI_OPCODE_AND] = { BRW_OPCODE_AND, 1, 2 },
[TGSI_OPCODE_OR] = { BRW_OPCODE_OR, 1, 2 },
[TGSI_OPCODE_MOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
[TGSI_OPCODE_XOR] = { BRW_OPCODE_XOR, 1, 2 },
[TGSI_OPCODE_EMIT] = { TOY_OPCODE_EMIT, 0, 0 },
[TGSI_OPCODE_ENDPRIM] = { TOY_OPCODE_ENDPRIM, 0, 0 },
[TGSI_OPCODE_NOP] = { BRW_OPCODE_NOP, 0, 0 },
[TGSI_OPCODE_KILL_IF] = { TOY_OPCODE_KIL, 0, 1 },
[TGSI_OPCODE_END] = { BRW_OPCODE_NOP, 0, 0 },
[TGSI_OPCODE_F2I] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_IDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
[TGSI_OPCODE_IMAX] = { BRW_OPCODE_SEL, 1, 2 },
[TGSI_OPCODE_IMIN] = { BRW_OPCODE_SEL, 1, 2 },
[TGSI_OPCODE_INEG] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_ISHR] = { BRW_OPCODE_ASR, 1, 2 },
[TGSI_OPCODE_F2U] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_U2F] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_UADD] = { BRW_OPCODE_ADD, 1, 2 },
[TGSI_OPCODE_UDIV] = { TOY_OPCODE_INT_DIV_QUOTIENT, 1, 2 },
/* a later pass will move src[2] to accumulator */
[TGSI_OPCODE_UMAD] = { BRW_OPCODE_MAC, 1, 3 },
[TGSI_OPCODE_UMAX] = { BRW_OPCODE_SEL, 1, 2 },
[TGSI_OPCODE_UMIN] = { BRW_OPCODE_SEL, 1, 2 },
[TGSI_OPCODE_UMOD] = { TOY_OPCODE_INT_DIV_REMAINDER, 1, 2 },
[TGSI_OPCODE_UMUL] = { BRW_OPCODE_MUL, 1, 2 },
[TGSI_OPCODE_USHR] = { BRW_OPCODE_SHR, 1, 2 },
[TGSI_OPCODE_UARL] = { BRW_OPCODE_MOV, 1, 1 },
[TGSI_OPCODE_IABS] = { BRW_OPCODE_MOV, 1, 1 },
};
 
static void
aos_simple(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
int opcode;
int cond_modifier = BRW_CONDITIONAL_NONE;
int num_dst = tgsi_inst->Instruction.NumDstRegs;
int num_src = tgsi_inst->Instruction.NumSrcRegs;
int i;
 
opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
if (!opcode) {
assert(!"invalid aos_simple() call");
return;
}
 
/* no need to emit nop */
if (opcode == BRW_OPCODE_NOP)
return;
 
inst = tc_add(tc);
if (!inst)
return;
 
inst->opcode = opcode;
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_MIN:
case TGSI_OPCODE_IMIN:
case TGSI_OPCODE_UMIN:
cond_modifier = BRW_CONDITIONAL_L;
break;
case TGSI_OPCODE_MAX:
case TGSI_OPCODE_IMAX:
case TGSI_OPCODE_UMAX:
cond_modifier = BRW_CONDITIONAL_GE;
break;
case TGSI_OPCODE_SUB:
src[1] = tsrc_negate(src[1]);
break;
case TGSI_OPCODE_ABS:
case TGSI_OPCODE_IABS:
src[0] = tsrc_absolute(src[0]);
break;
case TGSI_OPCODE_IF:
cond_modifier = BRW_CONDITIONAL_NEQ;
num_src = 2;
assert(src[0].type == TOY_TYPE_F);
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
src[1] = tsrc_imm_f(0.0f);
break;
case TGSI_OPCODE_UIF:
cond_modifier = BRW_CONDITIONAL_NEQ;
num_src = 2;
assert(src[0].type == TOY_TYPE_UD);
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
src[1] = tsrc_imm_d(0);
break;
case TGSI_OPCODE_INEG:
src[0] = tsrc_negate(src[0]);
break;
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
break;
case TGSI_OPCODE_POW:
src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
break;
}
 
inst->cond_modifier = cond_modifier;
 
if (num_dst) {
assert(num_dst == 1);
inst->dst = dst[0];
}
 
assert(num_src <= Elements(inst->src));
for (i = 0; i < num_src; i++)
inst->src[i] = src[i];
}
 
static void
aos_set_on_cond(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
int cond;
struct toy_src zero, one;
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_ISLT:
case TGSI_OPCODE_USLT:
cond = BRW_CONDITIONAL_L;
break;
case TGSI_OPCODE_SGE:
case TGSI_OPCODE_ISGE:
case TGSI_OPCODE_USGE:
cond = BRW_CONDITIONAL_GE;
break;
case TGSI_OPCODE_SEQ:
case TGSI_OPCODE_USEQ:
cond = BRW_CONDITIONAL_EQ;
break;
case TGSI_OPCODE_SGT:
cond = BRW_CONDITIONAL_G;
break;
case TGSI_OPCODE_SLE:
cond = BRW_CONDITIONAL_LE;
break;
case TGSI_OPCODE_SNE:
case TGSI_OPCODE_USNE:
cond = BRW_CONDITIONAL_NEQ;
break;
default:
assert(!"invalid aos_set_on_cond() call");
return;
}
 
/* note that for integer versions, all bits are set */
switch (dst[0].type) {
case TOY_TYPE_F:
default:
zero = tsrc_imm_f(0.0f);
one = tsrc_imm_f(1.0f);
break;
case TOY_TYPE_D:
zero = tsrc_imm_d(0);
one = tsrc_imm_d(-1);
break;
case TOY_TYPE_UD:
zero = tsrc_imm_ud(0);
one = tsrc_imm_ud(~0);
break;
}
 
tc_MOV(tc, dst[0], zero);
tc_CMP(tc, tdst_null(), src[0], src[1], cond);
inst = tc_MOV(tc, dst[0], one);
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
}
 
static void
aos_compare(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
struct toy_src zero;
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_CMP:
zero = tsrc_imm_f(0.0f);
break;
case TGSI_OPCODE_UCMP:
zero = tsrc_imm_ud(0);
break;
default:
assert(!"invalid aos_compare() call");
return;
}
 
tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
inst = tc_SEL(tc, dst[0], src[1], src[2], BRW_CONDITIONAL_NONE);
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
}
 
static void
aos_set_sign(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
struct toy_src zero, one, neg_one;
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_SSG:
zero = tsrc_imm_f(0.0f);
one = tsrc_imm_f(1.0f);
neg_one = tsrc_imm_f(-1.0f);
break;
case TGSI_OPCODE_ISSG:
zero = tsrc_imm_d(0);
one = tsrc_imm_d(1);
neg_one = tsrc_imm_d(-1);
break;
default:
assert(!"invalid aos_set_sign() call");
return;
}
 
tc_MOV(tc, dst[0], zero);
 
tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_G);
inst = tc_MOV(tc, dst[0], one);
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
 
tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
inst = tc_MOV(tc, dst[0], neg_one);
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
}
 
static void
aos_tex(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
enum toy_opcode opcode;
int i;
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_TEX:
opcode = TOY_OPCODE_TGSI_TEX;
break;
case TGSI_OPCODE_TXD:
opcode = TOY_OPCODE_TGSI_TXD;
break;
case TGSI_OPCODE_TXP:
opcode = TOY_OPCODE_TGSI_TXP;
break;
case TGSI_OPCODE_TXB:
opcode = TOY_OPCODE_TGSI_TXB;
break;
case TGSI_OPCODE_TXL:
opcode = TOY_OPCODE_TGSI_TXL;
break;
case TGSI_OPCODE_TXF:
opcode = TOY_OPCODE_TGSI_TXF;
break;
case TGSI_OPCODE_TXQ:
opcode = TOY_OPCODE_TGSI_TXQ;
break;
case TGSI_OPCODE_TXQ_LZ:
opcode = TOY_OPCODE_TGSI_TXQ_LZ;
break;
case TGSI_OPCODE_TEX2:
opcode = TOY_OPCODE_TGSI_TEX2;
break;
case TGSI_OPCODE_TXB2:
opcode = TOY_OPCODE_TGSI_TXB2;
break;
case TGSI_OPCODE_TXL2:
opcode = TOY_OPCODE_TGSI_TXL2;
break;
default:
assert(!"unsupported texturing opcode");
return;
break;
}
 
assert(tgsi_inst->Instruction.Texture);
 
inst = tc_add(tc);
inst->opcode = opcode;
inst->tex.target = tgsi_inst->Texture.Texture;
 
assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
assert(tgsi_inst->Instruction.NumDstRegs == 1);
 
inst->dst = dst[0];
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
inst->src[i] = src[i];
 
for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
tc_fail(tc, "texelFetchOffset unsupported");
}
 
static void
aos_sample(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
enum toy_opcode opcode;
int i;
 
assert(!"sampling untested");
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_SAMPLE:
opcode = TOY_OPCODE_TGSI_SAMPLE;
break;
case TGSI_OPCODE_SAMPLE_I:
opcode = TOY_OPCODE_TGSI_SAMPLE_I;
break;
case TGSI_OPCODE_SAMPLE_I_MS:
opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
break;
case TGSI_OPCODE_SAMPLE_B:
opcode = TOY_OPCODE_TGSI_SAMPLE_B;
break;
case TGSI_OPCODE_SAMPLE_C:
opcode = TOY_OPCODE_TGSI_SAMPLE_C;
break;
case TGSI_OPCODE_SAMPLE_C_LZ:
opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
break;
case TGSI_OPCODE_SAMPLE_D:
opcode = TOY_OPCODE_TGSI_SAMPLE_D;
break;
case TGSI_OPCODE_SAMPLE_L:
opcode = TOY_OPCODE_TGSI_SAMPLE_L;
break;
case TGSI_OPCODE_GATHER4:
opcode = TOY_OPCODE_TGSI_GATHER4;
break;
case TGSI_OPCODE_SVIEWINFO:
opcode = TOY_OPCODE_TGSI_SVIEWINFO;
break;
case TGSI_OPCODE_SAMPLE_POS:
opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
break;
case TGSI_OPCODE_SAMPLE_INFO:
opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
break;
default:
assert(!"unsupported sampling opcode");
return;
break;
}
 
inst = tc_add(tc);
inst->opcode = opcode;
 
assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
assert(tgsi_inst->Instruction.NumDstRegs == 1);
 
inst->dst = dst[0];
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
inst->src[i] = src[i];
}
 
static void
aos_LIT(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
 
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
 
if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
return;
 
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
 
tc_CMP(tc, tdst_null(),
tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
tsrc_imm_f(0.0f),
BRW_CONDITIONAL_G);
 
inst = tc_MOV(tc,
tdst_writemask(dst[0], TOY_WRITEMASK_Y),
tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
 
/* clamp W to (-128, 128)? */
inst = tc_POW(tc,
tdst_writemask(dst[0], TOY_WRITEMASK_Z),
tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
}
 
static void
aos_EXP(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
 
if (dst[0].writemask & TOY_WRITEMASK_X) {
struct toy_dst tmp =
tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
 
tc_RNDD(tc, tmp, src0);
 
/* construct the floating point number manually */
tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
tsrc_from(tmp), tsrc_imm_d(23));
}
 
tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
}
 
static void
aos_LOG(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
 
if (dst[0].writemask & TOY_WRITEMASK_XY) {
struct toy_dst tmp;
 
tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
 
/* exponent */
tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
tsrc_from(tmp), tsrc_imm_d(-127));
 
/* mantissa */
tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
tsrc_from(tmp), tsrc_imm_d(127 << 23));
}
 
tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
}
 
static void
aos_DST(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
}
 
static void
aos_LRP(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
}
 
static void
aos_CND(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
 
assert(!"CND untested");
 
tc_CMP(tc, tdst_null(), src[2], tsrc_imm_f(0.5f), BRW_CONDITIONAL_G);
inst = tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_NONE);
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
}
 
static void
aos_DP2A(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
assert(!"DP2A untested");
 
tc_DP2(tc, tmp, src[0], src[1]);
tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
}
 
static void
aos_CLAMP(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
assert(!"CLAMP untested");
 
tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_GE);
tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), BRW_CONDITIONAL_L);
}
 
static void
aos_XPD(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
TOY_SWIZZLE_X, TOY_SWIZZLE_W));
 
tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
TOY_SWIZZLE_X, TOY_SWIZZLE_W),
tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
tsrc_negate(tsrc_from(tmp)));
 
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
tsrc_imm_f(1.0f));
}
 
static void
aos_PK2H(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
 
assert(!"PK2H untested");
 
tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
}
 
static void
aos_SFL(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
assert(!"SFL untested");
 
tc_MOV(tc, dst[0], tsrc_imm_f(0.0f));
}
 
static void
aos_STR(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
assert(!"STR untested");
 
tc_MOV(tc, dst[0], tsrc_imm_f(1.0f));
}
 
static void
aos_UP2H(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
assert(!"UP2H untested");
 
tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
tsrc_ud(src[0]), tsrc_imm_ud(16));
}
 
static void
aos_SCS(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
assert(!"SCS untested");
 
tc_add1(tc, TOY_OPCODE_COS,
tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
 
tc_add1(tc, TOY_OPCODE_SIN,
tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
 
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
}
 
static void
aos_NRM(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
assert(!"NRM untested");
 
tc_DP3(tc, tmp, src[0], src[0]);
tc_INV(tc, tmp, tsrc_from(tmp));
tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
src[0], tsrc_from(tmp));
 
tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
}
 
static void
aos_DIV(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
assert(!"DIV untested");
 
tc_INV(tc, tmp, src[1]);
tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
}
 
static void
aos_BRK(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
tc_add0(tc, BRW_OPCODE_BREAK);
}
 
static void
aos_CEIL(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
tc_RNDD(tc, tmp, tsrc_negate(src[0]));
tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
}
 
static void
aos_SAD(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
assert(!"SAD untested");
 
tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
}
 
static void
aos_CONT(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
tc_add0(tc, BRW_OPCODE_CONTINUE);
}
 
static void
aos_BGNLOOP(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_inst *inst;
 
inst = tc_add0(tc, BRW_OPCODE_DO);
/* this is just a marker */
inst->marker = true;
}
 
static void
aos_ENDLOOP(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
tc_add0(tc, BRW_OPCODE_WHILE);
}
 
static void
aos_NRM4(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
struct toy_dst tmp = tc_alloc_tmp(tc);
 
assert(!"NRM4 untested");
 
tc_DP4(tc, tmp, src[0], src[0]);
tc_INV(tc, tmp, tsrc_from(tmp));
tc_MUL(tc, dst[0], tsrc_swizzle1(src[0], TOY_SWIZZLE_X), tsrc_from(tmp));
}
 
static void
aos_unsupported(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src)
{
const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
 
ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
 
tc_fail(tc, "unsupported TGSI instruction");
}
 
static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_ARL] = aos_simple,
[TGSI_OPCODE_MOV] = aos_simple,
[TGSI_OPCODE_LIT] = aos_LIT,
[TGSI_OPCODE_RCP] = aos_simple,
[TGSI_OPCODE_RSQ] = aos_simple,
[TGSI_OPCODE_EXP] = aos_EXP,
[TGSI_OPCODE_LOG] = aos_LOG,
[TGSI_OPCODE_MUL] = aos_simple,
[TGSI_OPCODE_ADD] = aos_simple,
[TGSI_OPCODE_DP3] = aos_simple,
[TGSI_OPCODE_DP4] = aos_simple,
[TGSI_OPCODE_DST] = aos_DST,
[TGSI_OPCODE_MIN] = aos_simple,
[TGSI_OPCODE_MAX] = aos_simple,
[TGSI_OPCODE_SLT] = aos_set_on_cond,
[TGSI_OPCODE_SGE] = aos_set_on_cond,
[TGSI_OPCODE_MAD] = aos_simple,
[TGSI_OPCODE_SUB] = aos_simple,
[TGSI_OPCODE_LRP] = aos_LRP,
[TGSI_OPCODE_CND] = aos_CND,
[TGSI_OPCODE_SQRT] = aos_simple,
[TGSI_OPCODE_DP2A] = aos_DP2A,
[22] = aos_unsupported,
[23] = aos_unsupported,
[TGSI_OPCODE_FRC] = aos_simple,
[TGSI_OPCODE_CLAMP] = aos_CLAMP,
[TGSI_OPCODE_FLR] = aos_simple,
[TGSI_OPCODE_ROUND] = aos_simple,
[TGSI_OPCODE_EX2] = aos_simple,
[TGSI_OPCODE_LG2] = aos_simple,
[TGSI_OPCODE_POW] = aos_simple,
[TGSI_OPCODE_XPD] = aos_XPD,
[32] = aos_unsupported,
[TGSI_OPCODE_ABS] = aos_simple,
[TGSI_OPCODE_RCC] = aos_unsupported,
[TGSI_OPCODE_DPH] = aos_simple,
[TGSI_OPCODE_COS] = aos_simple,
[TGSI_OPCODE_DDX] = aos_unsupported,
[TGSI_OPCODE_DDY] = aos_unsupported,
[TGSI_OPCODE_KILL] = aos_simple,
[TGSI_OPCODE_PK2H] = aos_PK2H,
[TGSI_OPCODE_PK2US] = aos_unsupported,
[TGSI_OPCODE_PK4B] = aos_unsupported,
[TGSI_OPCODE_PK4UB] = aos_unsupported,
[TGSI_OPCODE_RFL] = aos_unsupported,
[TGSI_OPCODE_SEQ] = aos_set_on_cond,
[TGSI_OPCODE_SFL] = aos_SFL,
[TGSI_OPCODE_SGT] = aos_set_on_cond,
[TGSI_OPCODE_SIN] = aos_simple,
[TGSI_OPCODE_SLE] = aos_set_on_cond,
[TGSI_OPCODE_SNE] = aos_set_on_cond,
[TGSI_OPCODE_STR] = aos_STR,
[TGSI_OPCODE_TEX] = aos_tex,
[TGSI_OPCODE_TXD] = aos_tex,
[TGSI_OPCODE_TXP] = aos_tex,
[TGSI_OPCODE_UP2H] = aos_UP2H,
[TGSI_OPCODE_UP2US] = aos_unsupported,
[TGSI_OPCODE_UP4B] = aos_unsupported,
[TGSI_OPCODE_UP4UB] = aos_unsupported,
[TGSI_OPCODE_X2D] = aos_unsupported,
[TGSI_OPCODE_ARA] = aos_unsupported,
[TGSI_OPCODE_ARR] = aos_simple,
[TGSI_OPCODE_BRA] = aos_unsupported,
[TGSI_OPCODE_CAL] = aos_unsupported,
[TGSI_OPCODE_RET] = aos_unsupported,
[TGSI_OPCODE_SSG] = aos_set_sign,
[TGSI_OPCODE_CMP] = aos_compare,
[TGSI_OPCODE_SCS] = aos_SCS,
[TGSI_OPCODE_TXB] = aos_tex,
[TGSI_OPCODE_NRM] = aos_NRM,
[TGSI_OPCODE_DIV] = aos_DIV,
[TGSI_OPCODE_DP2] = aos_simple,
[TGSI_OPCODE_TXL] = aos_tex,
[TGSI_OPCODE_BRK] = aos_BRK,
[TGSI_OPCODE_IF] = aos_simple,
[TGSI_OPCODE_UIF] = aos_simple,
[76] = aos_unsupported,
[TGSI_OPCODE_ELSE] = aos_simple,
[TGSI_OPCODE_ENDIF] = aos_simple,
[79] = aos_unsupported,
[80] = aos_unsupported,
[TGSI_OPCODE_PUSHA] = aos_unsupported,
[TGSI_OPCODE_POPA] = aos_unsupported,
[TGSI_OPCODE_CEIL] = aos_CEIL,
[TGSI_OPCODE_I2F] = aos_simple,
[TGSI_OPCODE_NOT] = aos_simple,
[TGSI_OPCODE_TRUNC] = aos_simple,
[TGSI_OPCODE_SHL] = aos_simple,
[88] = aos_unsupported,
[TGSI_OPCODE_AND] = aos_simple,
[TGSI_OPCODE_OR] = aos_simple,
[TGSI_OPCODE_MOD] = aos_simple,
[TGSI_OPCODE_XOR] = aos_simple,
[TGSI_OPCODE_SAD] = aos_SAD,
[TGSI_OPCODE_TXF] = aos_tex,
[TGSI_OPCODE_TXQ] = aos_tex,
[TGSI_OPCODE_CONT] = aos_CONT,
[TGSI_OPCODE_EMIT] = aos_simple,
[TGSI_OPCODE_ENDPRIM] = aos_simple,
[TGSI_OPCODE_BGNLOOP] = aos_BGNLOOP,
[TGSI_OPCODE_BGNSUB] = aos_unsupported,
[TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP,
[TGSI_OPCODE_ENDSUB] = aos_unsupported,
[TGSI_OPCODE_TXQ_LZ] = aos_tex,
[104] = aos_unsupported,
[105] = aos_unsupported,
[106] = aos_unsupported,
[TGSI_OPCODE_NOP] = aos_simple,
[108] = aos_unsupported,
[109] = aos_unsupported,
[110] = aos_unsupported,
[111] = aos_unsupported,
[TGSI_OPCODE_NRM4] = aos_NRM4,
[TGSI_OPCODE_CALLNZ] = aos_unsupported,
[TGSI_OPCODE_BREAKC] = aos_unsupported,
[TGSI_OPCODE_KILL_IF] = aos_simple,
[TGSI_OPCODE_END] = aos_simple,
[118] = aos_unsupported,
[TGSI_OPCODE_F2I] = aos_simple,
[TGSI_OPCODE_IDIV] = aos_simple,
[TGSI_OPCODE_IMAX] = aos_simple,
[TGSI_OPCODE_IMIN] = aos_simple,
[TGSI_OPCODE_INEG] = aos_simple,
[TGSI_OPCODE_ISGE] = aos_set_on_cond,
[TGSI_OPCODE_ISHR] = aos_simple,
[TGSI_OPCODE_ISLT] = aos_set_on_cond,
[TGSI_OPCODE_F2U] = aos_simple,
[TGSI_OPCODE_U2F] = aos_simple,
[TGSI_OPCODE_UADD] = aos_simple,
[TGSI_OPCODE_UDIV] = aos_simple,
[TGSI_OPCODE_UMAD] = aos_simple,
[TGSI_OPCODE_UMAX] = aos_simple,
[TGSI_OPCODE_UMIN] = aos_simple,
[TGSI_OPCODE_UMOD] = aos_simple,
[TGSI_OPCODE_UMUL] = aos_simple,
[TGSI_OPCODE_USEQ] = aos_set_on_cond,
[TGSI_OPCODE_USGE] = aos_set_on_cond,
[TGSI_OPCODE_USHR] = aos_simple,
[TGSI_OPCODE_USLT] = aos_set_on_cond,
[TGSI_OPCODE_USNE] = aos_set_on_cond,
[TGSI_OPCODE_SWITCH] = aos_unsupported,
[TGSI_OPCODE_CASE] = aos_unsupported,
[TGSI_OPCODE_DEFAULT] = aos_unsupported,
[TGSI_OPCODE_ENDSWITCH] = aos_unsupported,
[TGSI_OPCODE_SAMPLE] = aos_sample,
[TGSI_OPCODE_SAMPLE_I] = aos_sample,
[TGSI_OPCODE_SAMPLE_I_MS] = aos_sample,
[TGSI_OPCODE_SAMPLE_B] = aos_sample,
[TGSI_OPCODE_SAMPLE_C] = aos_sample,
[TGSI_OPCODE_SAMPLE_C_LZ] = aos_sample,
[TGSI_OPCODE_SAMPLE_D] = aos_sample,
[TGSI_OPCODE_SAMPLE_L] = aos_sample,
[TGSI_OPCODE_GATHER4] = aos_sample,
[TGSI_OPCODE_SVIEWINFO] = aos_sample,
[TGSI_OPCODE_SAMPLE_POS] = aos_sample,
[TGSI_OPCODE_SAMPLE_INFO] = aos_sample,
[TGSI_OPCODE_UARL] = aos_simple,
[TGSI_OPCODE_UCMP] = aos_compare,
[TGSI_OPCODE_IABS] = aos_simple,
[TGSI_OPCODE_ISSG] = aos_set_sign,
[TGSI_OPCODE_LOAD] = aos_unsupported,
[TGSI_OPCODE_STORE] = aos_unsupported,
[TGSI_OPCODE_MFENCE] = aos_unsupported,
[TGSI_OPCODE_LFENCE] = aos_unsupported,
[TGSI_OPCODE_SFENCE] = aos_unsupported,
[TGSI_OPCODE_BARRIER] = aos_unsupported,
[TGSI_OPCODE_ATOMUADD] = aos_unsupported,
[TGSI_OPCODE_ATOMXCHG] = aos_unsupported,
[TGSI_OPCODE_ATOMCAS] = aos_unsupported,
[TGSI_OPCODE_ATOMAND] = aos_unsupported,
[TGSI_OPCODE_ATOMOR] = aos_unsupported,
[TGSI_OPCODE_ATOMXOR] = aos_unsupported,
[TGSI_OPCODE_ATOMUMIN] = aos_unsupported,
[TGSI_OPCODE_ATOMUMAX] = aos_unsupported,
[TGSI_OPCODE_ATOMIMIN] = aos_unsupported,
[TGSI_OPCODE_ATOMIMAX] = aos_unsupported,
[TGSI_OPCODE_TEX2] = aos_tex,
[TGSI_OPCODE_TXB2] = aos_tex,
[TGSI_OPCODE_TXL2] = aos_tex,
};
 
static void
soa_passthrough(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
const toy_tgsi_translate translate =
aos_translate_table[tgsi_inst->Instruction.Opcode];
 
translate(tc, tgsi_inst, dst_, src_);
}
 
static void
soa_per_channel(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
int i, ch;
 
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
tdst_transpose(dst_[i], dst[i]);
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
tsrc_transpose(src_[i], src[i]);
 
/* emit the same instruction four times for the four channels */
for (ch = 0; ch < 4; ch++) {
struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
 
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
aos_dst[i] = dst[i][ch];
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
aos_src[i] = src[i][ch];
 
aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
tgsi_inst, aos_dst, aos_src);
}
}
 
static void
soa_scalar_replicate(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4], tmp;
struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
int opcode, i;
 
assert(tgsi_inst->Instruction.NumDstRegs == 1);
 
tdst_transpose(dst_[0], dst0);
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
struct toy_src tmp[4];
 
tsrc_transpose(src_[i], tmp);
/* only the X channels */
srcx[i] = tmp[0];
}
 
tmp = tc_alloc_tmp(tc);
 
opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
assert(opcode);
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_RCP:
case TGSI_OPCODE_RSQ:
case TGSI_OPCODE_SQRT:
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
case TGSI_OPCODE_COS:
case TGSI_OPCODE_SIN:
tc_add1(tc, opcode, tmp, srcx[0]);
break;
case TGSI_OPCODE_POW:
tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
break;
default:
assert(!"invalid soa_scalar_replicate() call");
return;
}
 
/* replicate the result */
for (i = 0; i < 4; i++)
tc_MOV(tc, dst0[i], tsrc_from(tmp));
}
 
static void
soa_dot_product(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4], tmp;
struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
int i;
 
tdst_transpose(dst_[0], dst0);
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
tsrc_transpose(src_[i], src[i]);
 
tmp = tc_alloc_tmp(tc);
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_DP2:
tc_MUL(tc, tmp, src[0][1], src[1][1]);
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
break;
case TGSI_OPCODE_DP2A:
tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
break;
case TGSI_OPCODE_DP3:
tc_MUL(tc, tmp, src[0][2], src[1][2]);
tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
break;
case TGSI_OPCODE_DPH:
tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
break;
case TGSI_OPCODE_DP4:
tc_MUL(tc, tmp, src[0][3], src[1][3]);
tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
break;
default:
assert(!"invalid soa_dot_product() call");
return;
}
 
for (i = 0; i < 4; i++)
tc_MOV(tc, dst0[i], tsrc_from(tmp));
}
 
static void
soa_partial_derivative(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
else
tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
}
 
static void
soa_if(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_src src0[4];
 
assert(tsrc_is_swizzle1(src_[0]));
tsrc_transpose(src_[0], src0);
 
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_NEQ);
else
tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), BRW_CONDITIONAL_NEQ);
}
 
static void
soa_LIT(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_inst *inst;
struct toy_dst dst0[4];
struct toy_src src0[4];
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
tc_MOV(tc, dst0[1], src0[0]);
tc_POW(tc, dst0[2], src0[1], src0[3]);
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
 
/*
* POW is calculated first because math with pred_ctrl is broken here.
* But, why?
*/
tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_L);
inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
inst->pred_ctrl = BRW_PREDICATE_NORMAL;
}
 
static void
soa_EXP(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4];
struct toy_src src0[4];
 
assert(!"SoA EXP untested");
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
if (!tdst_is_null(dst0[0])) {
struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
 
tc_RNDD(tc, tmp, src0[0]);
 
/* construct the floating point number manually */
tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
}
 
tc_FRC(tc, dst0[1], src0[0]);
tc_EXP(tc, dst0[2], src0[0]);
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
}
 
static void
soa_LOG(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4];
struct toy_src src0[4];
 
assert(!"SoA LOG untested");
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
if (dst_[0].writemask & TOY_WRITEMASK_XY) {
struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
 
/* exponent */
tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
 
/* mantissa */
tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
}
 
tc_LOG(tc, dst0[2], src0[0]);
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
}
 
static void
soa_DST(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4];
struct toy_src src[2][4];
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src[0]);
tsrc_transpose(src_[1], src[1]);
 
tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
tc_MOV(tc, dst0[2], src[0][2]);
tc_MOV(tc, dst0[3], src[1][3]);
}
 
static void
soa_XPD(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4];
struct toy_src src[2][4];
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src[0]);
tsrc_transpose(src_[1], src[1]);
 
/* dst.x = src0.y * src1.z - src1.y * src0.z */
tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
 
/* dst.y = src0.z * src1.x - src1.z * src0.x */
tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
 
/* dst.z = src0.x * src1.y - src1.x * src0.y */
tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
 
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
}
 
static void
soa_PK2H(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
struct toy_dst dst0[4];
struct toy_src src0[4];
int i;
 
assert(!"SoA PK2H untested");
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
 
for (i = 0; i < 4; i++)
tc_MOV(tc, dst0[i], tsrc_from(tmp));
}
 
static void
soa_UP2H(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4];
struct toy_src src0[4];
 
assert(!"SoA UP2H untested");
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
 
}
 
static void
soa_SCS(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
struct toy_dst dst0[4];
struct toy_src src0[4];
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
}
 
static void
soa_NRM(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
const struct toy_dst tmp = tc_alloc_tmp(tc);
struct toy_dst dst0[4];
struct toy_src src0[4];
 
assert(!"SoA NRM untested");
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
tc_MUL(tc, tmp, src0[2], src0[2]);
tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
tc_INV(tc, tmp, tsrc_from(tmp));
 
tc_MUL(tc, dst0[0], src0[0], tsrc_from(tmp));
tc_MUL(tc, dst0[1], src0[1], tsrc_from(tmp));
tc_MUL(tc, dst0[2], src0[2], tsrc_from(tmp));
tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
}
 
static void
soa_NRM4(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
const struct toy_dst tmp = tc_alloc_tmp(tc);
struct toy_dst dst0[4];
struct toy_src src0[4];
int i;
 
assert(!"SoA NRM4 untested");
 
tdst_transpose(dst_[0], dst0);
tsrc_transpose(src_[0], src0);
 
tc_MUL(tc, tmp, src0[3], src0[3]);
tc_MAC(tc, tmp, src0[2], src0[2], tsrc_from(tmp));
tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
tc_INV(tc, tmp, tsrc_from(tmp));
 
for (i = 0; i < 4; i++)
tc_MUL(tc, dst0[i], src0[0], tsrc_from(tmp));
}
 
static void
soa_unsupported(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst_,
struct toy_src *src_)
{
const struct tgsi_opcode_info *info =
tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
 
ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
info->mnemonic);
 
tc_fail(tc, "unsupported TGSI instruction in SoA form");
}
 
static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_ARL] = soa_per_channel,
[TGSI_OPCODE_MOV] = soa_per_channel,
[TGSI_OPCODE_LIT] = soa_LIT,
[TGSI_OPCODE_RCP] = soa_scalar_replicate,
[TGSI_OPCODE_RSQ] = soa_scalar_replicate,
[TGSI_OPCODE_EXP] = soa_EXP,
[TGSI_OPCODE_LOG] = soa_LOG,
[TGSI_OPCODE_MUL] = soa_per_channel,
[TGSI_OPCODE_ADD] = soa_per_channel,
[TGSI_OPCODE_DP3] = soa_dot_product,
[TGSI_OPCODE_DP4] = soa_dot_product,
[TGSI_OPCODE_DST] = soa_DST,
[TGSI_OPCODE_MIN] = soa_per_channel,
[TGSI_OPCODE_MAX] = soa_per_channel,
[TGSI_OPCODE_SLT] = soa_per_channel,
[TGSI_OPCODE_SGE] = soa_per_channel,
[TGSI_OPCODE_MAD] = soa_per_channel,
[TGSI_OPCODE_SUB] = soa_per_channel,
[TGSI_OPCODE_LRP] = soa_per_channel,
[TGSI_OPCODE_CND] = soa_per_channel,
[TGSI_OPCODE_SQRT] = soa_scalar_replicate,
[TGSI_OPCODE_DP2A] = soa_dot_product,
[22] = soa_unsupported,
[23] = soa_unsupported,
[TGSI_OPCODE_FRC] = soa_per_channel,
[TGSI_OPCODE_CLAMP] = soa_per_channel,
[TGSI_OPCODE_FLR] = soa_per_channel,
[TGSI_OPCODE_ROUND] = soa_per_channel,
[TGSI_OPCODE_EX2] = soa_scalar_replicate,
[TGSI_OPCODE_LG2] = soa_scalar_replicate,
[TGSI_OPCODE_POW] = soa_scalar_replicate,
[TGSI_OPCODE_XPD] = soa_XPD,
[32] = soa_unsupported,
[TGSI_OPCODE_ABS] = soa_per_channel,
[TGSI_OPCODE_RCC] = soa_unsupported,
[TGSI_OPCODE_DPH] = soa_dot_product,
[TGSI_OPCODE_COS] = soa_scalar_replicate,
[TGSI_OPCODE_DDX] = soa_partial_derivative,
[TGSI_OPCODE_DDY] = soa_partial_derivative,
[TGSI_OPCODE_KILL] = soa_passthrough,
[TGSI_OPCODE_PK2H] = soa_PK2H,
[TGSI_OPCODE_PK2US] = soa_unsupported,
[TGSI_OPCODE_PK4B] = soa_unsupported,
[TGSI_OPCODE_PK4UB] = soa_unsupported,
[TGSI_OPCODE_RFL] = soa_unsupported,
[TGSI_OPCODE_SEQ] = soa_per_channel,
[TGSI_OPCODE_SFL] = soa_per_channel,
[TGSI_OPCODE_SGT] = soa_per_channel,
[TGSI_OPCODE_SIN] = soa_scalar_replicate,
[TGSI_OPCODE_SLE] = soa_per_channel,
[TGSI_OPCODE_SNE] = soa_per_channel,
[TGSI_OPCODE_STR] = soa_per_channel,
[TGSI_OPCODE_TEX] = soa_passthrough,
[TGSI_OPCODE_TXD] = soa_passthrough,
[TGSI_OPCODE_TXP] = soa_passthrough,
[TGSI_OPCODE_UP2H] = soa_UP2H,
[TGSI_OPCODE_UP2US] = soa_unsupported,
[TGSI_OPCODE_UP4B] = soa_unsupported,
[TGSI_OPCODE_UP4UB] = soa_unsupported,
[TGSI_OPCODE_X2D] = soa_unsupported,
[TGSI_OPCODE_ARA] = soa_unsupported,
[TGSI_OPCODE_ARR] = soa_per_channel,
[TGSI_OPCODE_BRA] = soa_unsupported,
[TGSI_OPCODE_CAL] = soa_unsupported,
[TGSI_OPCODE_RET] = soa_unsupported,
[TGSI_OPCODE_SSG] = soa_per_channel,
[TGSI_OPCODE_CMP] = soa_per_channel,
[TGSI_OPCODE_SCS] = soa_SCS,
[TGSI_OPCODE_TXB] = soa_passthrough,
[TGSI_OPCODE_NRM] = soa_NRM,
[TGSI_OPCODE_DIV] = soa_per_channel,
[TGSI_OPCODE_DP2] = soa_dot_product,
[TGSI_OPCODE_TXL] = soa_passthrough,
[TGSI_OPCODE_BRK] = soa_passthrough,
[TGSI_OPCODE_IF] = soa_if,
[TGSI_OPCODE_UIF] = soa_if,
[76] = soa_unsupported,
[TGSI_OPCODE_ELSE] = soa_passthrough,
[TGSI_OPCODE_ENDIF] = soa_passthrough,
[79] = soa_unsupported,
[80] = soa_unsupported,
[TGSI_OPCODE_PUSHA] = soa_unsupported,
[TGSI_OPCODE_POPA] = soa_unsupported,
[TGSI_OPCODE_CEIL] = soa_per_channel,
[TGSI_OPCODE_I2F] = soa_per_channel,
[TGSI_OPCODE_NOT] = soa_per_channel,
[TGSI_OPCODE_TRUNC] = soa_per_channel,
[TGSI_OPCODE_SHL] = soa_per_channel,
[88] = soa_unsupported,
[TGSI_OPCODE_AND] = soa_per_channel,
[TGSI_OPCODE_OR] = soa_per_channel,
[TGSI_OPCODE_MOD] = soa_per_channel,
[TGSI_OPCODE_XOR] = soa_per_channel,
[TGSI_OPCODE_SAD] = soa_per_channel,
[TGSI_OPCODE_TXF] = soa_passthrough,
[TGSI_OPCODE_TXQ] = soa_passthrough,
[TGSI_OPCODE_CONT] = soa_passthrough,
[TGSI_OPCODE_EMIT] = soa_unsupported,
[TGSI_OPCODE_ENDPRIM] = soa_unsupported,
[TGSI_OPCODE_BGNLOOP] = soa_passthrough,
[TGSI_OPCODE_BGNSUB] = soa_unsupported,
[TGSI_OPCODE_ENDLOOP] = soa_passthrough,
[TGSI_OPCODE_ENDSUB] = soa_unsupported,
[TGSI_OPCODE_TXQ_LZ] = soa_passthrough,
[104] = soa_unsupported,
[105] = soa_unsupported,
[106] = soa_unsupported,
[TGSI_OPCODE_NOP] = soa_passthrough,
[108] = soa_unsupported,
[109] = soa_unsupported,
[110] = soa_unsupported,
[111] = soa_unsupported,
[TGSI_OPCODE_NRM4] = soa_NRM4,
[TGSI_OPCODE_CALLNZ] = soa_unsupported,
[TGSI_OPCODE_BREAKC] = soa_unsupported,
[TGSI_OPCODE_KILL_IF] = soa_passthrough,
[TGSI_OPCODE_END] = soa_passthrough,
[118] = soa_unsupported,
[TGSI_OPCODE_F2I] = soa_per_channel,
[TGSI_OPCODE_IDIV] = soa_per_channel,
[TGSI_OPCODE_IMAX] = soa_per_channel,
[TGSI_OPCODE_IMIN] = soa_per_channel,
[TGSI_OPCODE_INEG] = soa_per_channel,
[TGSI_OPCODE_ISGE] = soa_per_channel,
[TGSI_OPCODE_ISHR] = soa_per_channel,
[TGSI_OPCODE_ISLT] = soa_per_channel,
[TGSI_OPCODE_F2U] = soa_per_channel,
[TGSI_OPCODE_U2F] = soa_per_channel,
[TGSI_OPCODE_UADD] = soa_per_channel,
[TGSI_OPCODE_UDIV] = soa_per_channel,
[TGSI_OPCODE_UMAD] = soa_per_channel,
[TGSI_OPCODE_UMAX] = soa_per_channel,
[TGSI_OPCODE_UMIN] = soa_per_channel,
[TGSI_OPCODE_UMOD] = soa_per_channel,
[TGSI_OPCODE_UMUL] = soa_per_channel,
[TGSI_OPCODE_USEQ] = soa_per_channel,
[TGSI_OPCODE_USGE] = soa_per_channel,
[TGSI_OPCODE_USHR] = soa_per_channel,
[TGSI_OPCODE_USLT] = soa_per_channel,
[TGSI_OPCODE_USNE] = soa_per_channel,
[TGSI_OPCODE_SWITCH] = soa_unsupported,
[TGSI_OPCODE_CASE] = soa_unsupported,
[TGSI_OPCODE_DEFAULT] = soa_unsupported,
[TGSI_OPCODE_ENDSWITCH] = soa_unsupported,
[TGSI_OPCODE_SAMPLE] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_I] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_I_MS] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_B] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_C] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_C_LZ] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_D] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_L] = soa_passthrough,
[TGSI_OPCODE_GATHER4] = soa_passthrough,
[TGSI_OPCODE_SVIEWINFO] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_POS] = soa_passthrough,
[TGSI_OPCODE_SAMPLE_INFO] = soa_passthrough,
[TGSI_OPCODE_UARL] = soa_per_channel,
[TGSI_OPCODE_UCMP] = soa_per_channel,
[TGSI_OPCODE_IABS] = soa_per_channel,
[TGSI_OPCODE_ISSG] = soa_per_channel,
[TGSI_OPCODE_LOAD] = soa_unsupported,
[TGSI_OPCODE_STORE] = soa_unsupported,
[TGSI_OPCODE_MFENCE] = soa_unsupported,
[TGSI_OPCODE_LFENCE] = soa_unsupported,
[TGSI_OPCODE_SFENCE] = soa_unsupported,
[TGSI_OPCODE_BARRIER] = soa_unsupported,
[TGSI_OPCODE_ATOMUADD] = soa_unsupported,
[TGSI_OPCODE_ATOMXCHG] = soa_unsupported,
[TGSI_OPCODE_ATOMCAS] = soa_unsupported,
[TGSI_OPCODE_ATOMAND] = soa_unsupported,
[TGSI_OPCODE_ATOMOR] = soa_unsupported,
[TGSI_OPCODE_ATOMXOR] = soa_unsupported,
[TGSI_OPCODE_ATOMUMIN] = soa_unsupported,
[TGSI_OPCODE_ATOMUMAX] = soa_unsupported,
[TGSI_OPCODE_ATOMIMIN] = soa_unsupported,
[TGSI_OPCODE_ATOMIMAX] = soa_unsupported,
[TGSI_OPCODE_TEX2] = soa_passthrough,
[TGSI_OPCODE_TXB2] = soa_passthrough,
[TGSI_OPCODE_TXL2] = soa_passthrough,
};
 
static bool
ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
{
return (d->Register.Indirect ||
(d->Register.Dimension && d->Dimension.Indirect));
}
 
static int
ra_dst_index(const struct tgsi_full_dst_register *d)
{
assert(!d->Register.Indirect);
return d->Register.Index;
}
 
static int
ra_dst_dimension(const struct tgsi_full_dst_register *d)
{
if (d->Register.Dimension) {
assert(!d->Dimension.Indirect);
return d->Dimension.Index;
}
else {
return 0;
}
}
 
static bool
ra_is_src_indirect(const struct tgsi_full_src_register *s)
{
return (s->Register.Indirect ||
(s->Register.Dimension && s->Dimension.Indirect));
}
 
static int
ra_src_index(const struct tgsi_full_src_register *s)
{
assert(!s->Register.Indirect);
return s->Register.Index;
}
 
static int
ra_src_dimension(const struct tgsi_full_src_register *s)
{
if (s->Register.Dimension) {
assert(!s->Dimension.Indirect);
return s->Dimension.Index;
}
else {
return 0;
}
}
 
/**
* Infer the type of either the sources or the destination.
*/
static enum toy_type
ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
{
enum tgsi_opcode_type type;
 
if (is_dst)
type = tgsi_opcode_infer_dst_type(tgsi_opcode);
else
type = tgsi_opcode_infer_src_type(tgsi_opcode);
 
switch (type) {
case TGSI_TYPE_UNSIGNED:
return TOY_TYPE_UD;
case TGSI_TYPE_SIGNED:
return TOY_TYPE_D;
case TGSI_TYPE_FLOAT:
return TOY_TYPE_F;
case TGSI_TYPE_UNTYPED:
case TGSI_TYPE_VOID:
case TGSI_TYPE_DOUBLE:
default:
assert(!"unsupported TGSI type");
return TOY_TYPE_UD;
}
}
 
/**
* Return the type of an operand of the specified instruction.
*/
static enum toy_type
ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
int operand, bool is_dst)
{
enum toy_type type;
enum tgsi_file_type file;
 
/* we need to look at both src and dst for MOV */
/* XXX it should not be this complex */
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
 
if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
type = TOY_TYPE_D;
}
else if (src_file == TGSI_FILE_IMMEDIATE &&
!tgsi_inst->Src[0].Register.Indirect) {
const int src_idx = tgsi_inst->Src[0].Register.Index;
type = tgsi->imm_data.types[src_idx];
}
else {
/* this is the best we can do */
type = TOY_TYPE_F;
}
 
return type;
}
else if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_UCMP) {
if (!is_dst && operand == 0)
type = TOY_TYPE_UD;
else
type = TOY_TYPE_F;
 
return type;
}
 
type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
 
/* fix the type */
file = (is_dst) ?
tgsi_inst->Dst[operand].Register.File :
tgsi_inst->Src[operand].Register.File;
switch (file) {
case TGSI_FILE_SAMPLER:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_SAMPLER_VIEW:
type = TOY_TYPE_D;
break;
case TGSI_FILE_ADDRESS:
assert(type == TOY_TYPE_D);
break;
default:
break;
}
 
return type;
}
 
/**
* Allocate a VRF register.
*/
static int
ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
{
const int count = (tgsi->aos) ? 1 : 4;
return tc_alloc_vrf(tgsi->tc, count);
}
 
/**
* Construct the key for VRF mapping look-up.
*/
static void *
ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
{
intptr_t key;
 
/* this is ugly... */
assert(file < 1 << 4);
assert(dim < 1 << 12);
assert(index < 1 << 16);
key = (file << 28) | (dim << 16) | index;
 
return intptr_to_pointer(key);
}
 
/**
* Map a TGSI register to a VRF register.
*/
static int
ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
int dim, int index, bool *is_new)
{
void *key, *val;
intptr_t vrf;
 
key = ra_get_map_key(file, dim, index);
 
/*
* because we allocate vrf from 1 and on, val is never NULL as long as the
* key exists
*/
val = util_hash_table_get(tgsi->reg_mapping, key);
if (val) {
vrf = pointer_to_intptr(val);
 
if (is_new)
*is_new = false;
}
else {
vrf = (intptr_t) ra_alloc_reg(tgsi, file);
 
/* add to the mapping */
val = intptr_to_pointer(vrf);
util_hash_table_set(tgsi->reg_mapping, key, val);
 
if (is_new)
*is_new = true;
}
 
return (int) vrf;
}
 
/**
* Return true if the destination aliases any of the sources.
*/
static bool
ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
{
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
int i;
 
/* we need a scratch register for indirect dst anyway */
if (ra_dst_is_indirect(d))
return true;
 
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
 
if (s->Register.File != d->Register.File)
continue;
 
/*
* we can go on to check dimension and index respectively, but
* keep it simple for now
*/
if (ra_is_src_indirect(s))
return true;
if (ra_src_dimension(s) == ra_dst_dimension(d) &&
ra_src_index(s) == ra_dst_index(d))
return true;
}
 
return false;
}
 
/**
* Return the toy register for a TGSI destination operand.
*/
static struct toy_dst
ra_get_dst(struct toy_tgsi *tgsi,
const struct tgsi_full_instruction *tgsi_inst, int dst_index,
bool *is_scratch)
{
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
bool need_vrf = false;
struct toy_dst dst;
 
switch (d->Register.File) {
case TGSI_FILE_NULL:
dst = tdst_null();
break;
case TGSI_FILE_OUTPUT:
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_PREDICATE:
need_vrf = true;
break;
default:
assert(!"unhandled dst file");
dst = tdst_null();
break;
}
 
if (need_vrf) {
/* XXX we do not always need a scratch given the conditions... */
const bool need_scratch =
(ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
tgsi_inst->Instruction.Saturate);
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
int vrf;
 
if (need_scratch) {
vrf = ra_alloc_reg(tgsi, d->Register.File);
}
else {
vrf = ra_map_reg(tgsi, d->Register.File,
ra_dst_dimension(d), ra_dst_index(d), NULL);
}
 
if (is_scratch)
*is_scratch = need_scratch;
 
dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
}
 
return dst;
}
 
static struct toy_src
ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
enum toy_type type, int vrf)
{
return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
false, 0,
s->Register.SwizzleX, s->Register.SwizzleY,
s->Register.SwizzleZ, s->Register.SwizzleW,
s->Register.Absolute, s->Register.Negate,
vrf * TOY_REG_WIDTH);
}
 
static int
init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
enum tgsi_file_type file, int index,
const struct tgsi_ind_register *indirect,
const struct tgsi_dimension *dimension,
const struct tgsi_ind_register *dim_indirect)
{
struct toy_src src;
int num_src = 0;
 
/* src[0]: TGSI file */
inst->src[num_src++] = tsrc_imm_d(file);
 
/* src[1]: TGSI dimension */
inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
 
/* src[2]: TGSI dimension indirection */
if (dim_indirect) {
const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
dim_indirect->Index, NULL);
 
src = tsrc(TOY_FILE_VRF, vrf, 0);
src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
}
else {
src = tsrc_imm_d(0);
}
 
inst->src[num_src++] = src;
 
/* src[3]: TGSI index */
inst->src[num_src++] = tsrc_imm_d(index);
 
/* src[4]: TGSI index indirection */
if (indirect) {
const int vrf = ra_map_reg(tgsi, indirect->File, 0,
indirect->Index, NULL);
 
src = tsrc(TOY_FILE_VRF, vrf, 0);
src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
}
else {
src = tsrc_imm_d(0);
}
 
inst->src[num_src++] = src;
 
return num_src;
}
 
static struct toy_src
ra_get_src_indirect(struct toy_tgsi *tgsi,
const struct tgsi_full_instruction *tgsi_inst,
int src_index)
{
const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
bool need_vrf = false, is_resource = false;
struct toy_src src;
 
switch (s->Register.File) {
case TGSI_FILE_NULL:
src = tsrc_null();
break;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_SAMPLER_VIEW:
is_resource = true;
/* fall through */
case TGSI_FILE_CONSTANT:
case TGSI_FILE_INPUT:
case TGSI_FILE_SYSTEM_VALUE:
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
need_vrf = true;
break;
default:
assert(!"unhandled src file");
src = tsrc_null();
break;
}
 
if (need_vrf) {
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
int vrf;
 
if (is_resource) {
assert(!s->Register.Dimension);
assert(s->Register.Indirect);
 
vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
}
else {
vrf = ra_alloc_reg(tgsi, s->Register.File);
}
 
src = ra_get_src_for_vrf(s, type, vrf);
 
/* emit indirect fetch */
if (!is_resource) {
struct toy_inst *inst;
 
inst = tc_add(tgsi->tc);
inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
inst->dst = tdst_from(src);
inst->dst.writemask = TOY_WRITEMASK_XYZW;
 
init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
(s->Register.Indirect) ? &s->Indirect : NULL,
(s->Register.Dimension) ? &s->Dimension : NULL,
(s->Dimension.Indirect) ? &s->DimIndirect : NULL);
}
}
 
return src;
}
 
/**
* Return the toy register for a TGSI source operand.
*/
static struct toy_src
ra_get_src(struct toy_tgsi *tgsi,
const struct tgsi_full_instruction *tgsi_inst,
int src_index)
{
const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
bool need_vrf = false;
struct toy_src src;
 
if (ra_is_src_indirect(s))
return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
 
switch (s->Register.File) {
case TGSI_FILE_NULL:
src = tsrc_null();
break;
case TGSI_FILE_CONSTANT:
case TGSI_FILE_INPUT:
case TGSI_FILE_SYSTEM_VALUE:
need_vrf = true;
break;
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_PREDICATE:
need_vrf = true;
break;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_SAMPLER_VIEW:
assert(!s->Register.Dimension);
src = tsrc_imm_d(s->Register.Index);
break;
case TGSI_FILE_IMMEDIATE:
{
const uint32_t *imm;
enum toy_type imm_type;
bool is_scalar;
 
imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
 
is_scalar =
(imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
 
if (is_scalar) {
const enum toy_type type =
ra_get_type(tgsi, tgsi_inst, src_index, false);
 
/* ignore imm_type */
src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
src.type = type;
src.absolute = s->Register.Absolute;
src.negate = s->Register.Negate;
}
else {
need_vrf = true;
}
}
break;
default:
assert(!"unhandled src file");
src = tsrc_null();
break;
}
 
if (need_vrf) {
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
bool is_new;
int vrf;
 
vrf = ra_map_reg(tgsi, s->Register.File,
ra_src_dimension(s), ra_src_index(s), &is_new);
 
src = ra_get_src_for_vrf(s, type, vrf);
 
if (is_new) {
switch (s->Register.File) {
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_PREDICATE:
{
struct toy_dst dst = tdst_from(src);
dst.writemask = TOY_WRITEMASK_XYZW;
 
/* always initialize registers before use */
if (tgsi->aos) {
tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
}
else {
struct toy_dst tdst[4];
int i;
 
tdst_transpose(dst, tdst);
 
for (i = 0; i < 4; i++) {
tc_MOV(tgsi->tc, tdst[i],
tsrc_type(tsrc_imm_d(0), type));
}
}
}
break;
default:
break;
}
}
 
}
 
return src;
}
 
static void
parse_instruction(struct toy_tgsi *tgsi,
const struct tgsi_full_instruction *tgsi_inst)
{
struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
toy_tgsi_translate translate;
int i;
 
/* convert TGSI registers to toy registers */
for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
src[i] = ra_get_src(tgsi, tgsi_inst, i);
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
 
/* translate the instruction */
translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
translate(tgsi->tc, tgsi_inst, dst, src);
 
/* write the result to the real destinations if needed */
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
 
if (!dst_is_scratch[i])
continue;
 
if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
 
tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
 
/* emit indirect store */
if (ra_dst_is_indirect(d)) {
struct toy_inst *inst;
 
inst = tc_add(tgsi->tc);
inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
inst->dst = dst[i];
 
init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
(d->Register.Indirect) ? &d->Indirect : NULL,
(d->Register.Dimension) ? &d->Dimension : NULL,
(d->Dimension.Indirect) ? &d->DimIndirect : NULL);
}
else {
const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
struct toy_dst real_dst;
int vrf;
 
vrf = ra_map_reg(tgsi, d->Register.File,
ra_dst_dimension(d), ra_dst_index(d), NULL);
real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
 
if (tgsi->aos) {
tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
}
else {
struct toy_dst tdst[4];
struct toy_src tsrc[4];
int j;
 
tdst_transpose(real_dst, tdst);
tsrc_transpose(tsrc_from(dst[i]), tsrc);
 
for (j = 0; j < 4; j++)
tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
}
}
 
tgsi->tc->templ.saturate = false;
}
 
switch (tgsi_inst->Instruction.Opcode) {
case TGSI_OPCODE_KILL_IF:
case TGSI_OPCODE_KILL:
tgsi->uses_kill = true;
break;
}
 
/* remember channels written */
for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
 
if (d->Register.File != TGSI_FILE_OUTPUT)
continue;
for (i = 0; i < tgsi->num_outputs; i++) {
if (tgsi->outputs[i].index == d->Register.Index) {
tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
break;
}
}
}
}
 
static void
decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
{
static const struct tgsi_declaration_interp default_interp = {
TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
};
const struct tgsi_declaration_interp *interp =
(decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
int index;
 
if (decl->Range.Last >= Elements(tgsi->inputs)) {
assert(!"invalid IN");
return;
}
 
for (index = decl->Range.First; index <= decl->Range.Last; index++) {
const int slot = tgsi->num_inputs++;
 
tgsi->inputs[slot].index = index;
tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
if (decl->Declaration.Semantic) {
tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
}
else {
tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
tgsi->inputs[slot].semantic_index = index;
}
tgsi->inputs[slot].interp = interp->Interpolate;
tgsi->inputs[slot].centroid = interp->Centroid;
}
}
 
static void
decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
{
int index;
 
if (decl->Range.Last >= Elements(tgsi->outputs)) {
assert(!"invalid OUT");
return;
}
 
assert(decl->Declaration.Semantic);
 
for (index = decl->Range.First; index <= decl->Range.Last; index++) {
const int slot = tgsi->num_outputs++;
 
tgsi->outputs[slot].index = index;
tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
}
}
 
static void
decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
{
int index;
 
if (decl->Range.Last >= Elements(tgsi->system_values)) {
assert(!"invalid SV");
return;
}
 
for (index = decl->Range.First; index <= decl->Range.Last; index++) {
const int slot = tgsi->num_system_values++;
 
tgsi->system_values[slot].index = index;
if (decl->Declaration.Semantic) {
tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
}
else {
tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
tgsi->system_values[slot].semantic_index = index;
}
}
}
 
/**
* Emit an instruction to fetch the value of a TGSI register.
*/
static void
fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
{
struct toy_dst dst;
int vrf;
enum toy_opcode opcode;
enum toy_type type = TOY_TYPE_F;
 
switch (file) {
case TGSI_FILE_INPUT:
opcode = TOY_OPCODE_TGSI_IN;
break;
case TGSI_FILE_CONSTANT:
opcode = TOY_OPCODE_TGSI_CONST;
break;
case TGSI_FILE_SYSTEM_VALUE:
opcode = TOY_OPCODE_TGSI_SV;
break;
case TGSI_FILE_IMMEDIATE:
opcode = TOY_OPCODE_TGSI_IMM;
toy_tgsi_get_imm(tgsi, idx, &type);
break;
default:
/* no need to fetch */
return;
break;
}
 
vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
dst = tdst(TOY_FILE_VRF, vrf, 0);
dst = tdst_type(dst, type);
 
tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
}
 
static void
parse_declaration(struct toy_tgsi *tgsi,
const struct tgsi_full_declaration *decl)
{
int i;
 
switch (decl->Declaration.File) {
case TGSI_FILE_INPUT:
decl_add_in(tgsi, decl);
break;
case TGSI_FILE_OUTPUT:
decl_add_out(tgsi, decl);
break;
case TGSI_FILE_SYSTEM_VALUE:
decl_add_sv(tgsi, decl);
break;
case TGSI_FILE_IMMEDIATE:
/* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
assert(!"unexpected immediate declaration");
break;
case TGSI_FILE_NULL:
case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_RESOURCE:
case TGSI_FILE_SAMPLER_VIEW:
/* nothing to do */
break;
default:
assert(!"unhandled TGSI file");
break;
}
 
/* fetch the registers now */
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
fetch_source(tgsi, decl->Declaration.File, dim, i);
}
}
 
static int
add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
{
/* reallocate the buffer if necessary */
if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
const int cur_size = tgsi->imm_data.size;
int new_size;
enum toy_type *new_types;
uint32_t (*new_buf)[4];
 
new_size = (cur_size) ? cur_size << 1 : 16;
while (new_size <= tgsi->imm_data.cur)
new_size <<= 1;
 
new_buf = REALLOC(tgsi->imm_data.buf,
cur_size * sizeof(new_buf[0]),
new_size * sizeof(new_buf[0]));
new_types = REALLOC(tgsi->imm_data.types,
cur_size * sizeof(new_types[0]),
new_size * sizeof(new_types[0]));
if (!new_buf || !new_types) {
if (new_buf)
FREE(new_buf);
if (new_types)
FREE(new_types);
return -1;
}
 
tgsi->imm_data.buf = new_buf;
tgsi->imm_data.types = new_types;
tgsi->imm_data.size = new_size;
}
 
tgsi->imm_data.types[tgsi->imm_data.cur] = type;
memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
buf, sizeof(tgsi->imm_data.buf[0]));
 
return tgsi->imm_data.cur++;
}
 
static void
parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
{
enum toy_type type;
uint32_t imm_buf[4];
int idx;
 
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
type = TOY_TYPE_F;
imm_buf[0] = fui(imm->u[0].Float);
imm_buf[1] = fui(imm->u[1].Float);
imm_buf[2] = fui(imm->u[2].Float);
imm_buf[3] = fui(imm->u[3].Float);
break;
case TGSI_IMM_INT32:
type = TOY_TYPE_D;
imm_buf[0] = (uint32_t) imm->u[0].Int;
imm_buf[1] = (uint32_t) imm->u[1].Int;
imm_buf[2] = (uint32_t) imm->u[2].Int;
imm_buf[3] = (uint32_t) imm->u[3].Int;
break;
case TGSI_IMM_UINT32:
type = TOY_TYPE_UD;
imm_buf[0] = imm->u[0].Uint;
imm_buf[1] = imm->u[1].Uint;
imm_buf[2] = imm->u[2].Uint;
imm_buf[3] = imm->u[3].Uint;
break;
default:
assert(!"unhandled TGSI imm type");
type = TOY_TYPE_F;
memset(imm_buf, 0, sizeof(imm_buf));
break;
}
 
idx = add_imm(tgsi, type, imm_buf);
if (idx >= 0)
fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
else
tc_fail(tgsi->tc, "failed to add TGSI imm");
}
 
static void
parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
{
switch (prop->Property.PropertyName) {
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
tgsi->props.fs_coord_origin = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
tgsi->props.fs_depth_layout = prop->u[0].Data;
break;
case TGSI_PROPERTY_GS_INPUT_PRIM:
tgsi->props.gs_input_prim = prop->u[0].Data;
break;
case TGSI_PROPERTY_GS_OUTPUT_PRIM:
tgsi->props.gs_output_prim = prop->u[0].Data;
break;
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
tgsi->props.gs_max_output_vertices = prop->u[0].Data;
break;
default:
assert(!"unhandled TGSI property");
break;
}
}
 
static void
parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
{
switch (token->Token.Type) {
case TGSI_TOKEN_TYPE_DECLARATION:
parse_declaration(tgsi, &token->FullDeclaration);
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
parse_immediate(tgsi, &token->FullImmediate);
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
parse_instruction(tgsi, &token->FullInstruction);
break;
case TGSI_TOKEN_TYPE_PROPERTY:
parse_property(tgsi, &token->FullProperty);
break;
default:
assert(!"unhandled TGSI token type");
break;
}
}
 
static enum pipe_error
dump_reg_mapping(void *key, void *val, void *data)
{
int tgsi_file, tgsi_dim, tgsi_index;
uint32_t sig, vrf;
 
sig = (uint32_t) pointer_to_intptr(key);
vrf = (uint32_t) pointer_to_intptr(val);
 
/* see ra_get_map_key() */
tgsi_file = (sig >> 28) & 0xf;
tgsi_dim = (sig >> 16) & 0xfff;
tgsi_index = (sig >> 0) & 0xffff;
 
if (tgsi_dim) {
ilo_printf(" v%d:\t%s[%d][%d]\n", vrf,
tgsi_file_name(tgsi_file), tgsi_dim, tgsi_index);
}
else {
ilo_printf(" v%d:\t%s[%d]\n", vrf,
tgsi_file_name(tgsi_file), tgsi_index);
}
 
return PIPE_OK;
}
 
/**
* Dump the TGSI translator, currently only the register mapping.
*/
void
toy_tgsi_dump(const struct toy_tgsi *tgsi)
{
util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
}
 
/**
* Clean up the TGSI translator.
*/
void
toy_tgsi_cleanup(struct toy_tgsi *tgsi)
{
FREE(tgsi->imm_data.buf);
FREE(tgsi->imm_data.types);
 
util_hash_table_destroy(tgsi->reg_mapping);
}
 
static unsigned
reg_mapping_hash(void *key)
{
return (unsigned) pointer_to_intptr(key);
}
 
static int
reg_mapping_compare(void *key1, void *key2)
{
return (key1 != key2);
}
 
/**
* Initialize the TGSI translator.
*/
static bool
init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
{
memset(tgsi, 0, sizeof(*tgsi));
 
tgsi->tc = tc;
tgsi->aos = aos;
tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
 
/* create a mapping of TGSI registers to VRF reigsters */
tgsi->reg_mapping =
util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
 
return (tgsi->reg_mapping != NULL);
}
 
/**
* Translate TGSI tokens into toy instructions.
*/
void
toy_compiler_translate_tgsi(struct toy_compiler *tc,
const struct tgsi_token *tokens, bool aos,
struct toy_tgsi *tgsi)
{
struct tgsi_parse_context parse;
 
if (!init_tgsi(tgsi, tc, aos)) {
tc_fail(tc, "failed to initialize TGSI translator");
return;
}
 
tgsi_parse_init(&parse, tokens);
while (!tgsi_parse_end_of_tokens(&parse)) {
tgsi_parse_token(&parse);
parse_token(tgsi, &parse.FullToken);
}
tgsi_parse_free(&parse);
}
 
/**
* Map the TGSI register to VRF register.
*/
int
toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
enum tgsi_file_type file, int dimension, int index)
{
void *key, *val;
 
key = ra_get_map_key(file, dimension, index);
 
val = util_hash_table_get(tgsi->reg_mapping, key);
 
return (val) ? pointer_to_intptr(val) : -1;
}
/contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/toy_tgsi.h
0,0 → 1,163
/*
* Mesa 3-D graphics library
*
* Copyright (C) 2012-2013 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
 
#ifndef TOY_TGSI_H
#define TOY_TGSI_H
 
#include "pipe/p_state.h"
#include "pipe/p_shader_tokens.h"
#include "toy_compiler.h"
 
struct tgsi_token;
struct tgsi_full_instruction;
struct util_hash_table;
 
typedef void (*toy_tgsi_translate)(struct toy_compiler *tc,
const struct tgsi_full_instruction *tgsi_inst,
struct toy_dst *dst,
struct toy_src *src);
 
struct toy_tgsi {
struct toy_compiler *tc;
bool aos;
const toy_tgsi_translate *translate_table;
 
struct util_hash_table *reg_mapping;
 
struct {
bool vs_prohibit_ucps;
int fs_coord_origin;
int fs_coord_pixel_center;
bool fs_color0_writes_all_cbufs;
int fs_depth_layout;
int gs_input_prim;
int gs_output_prim;
int gs_max_output_vertices;
} props;
 
struct {
enum toy_type *types;
uint32_t (*buf)[4];
int cur, size;
} imm_data;
 
struct {
int index:16;
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
unsigned semantic_index:8;
unsigned interp:4; /* TGSI_INTERPOLATE_x */
unsigned centroid:1;
} inputs[PIPE_MAX_SHADER_INPUTS];
int num_inputs;
 
struct {
int index:16;
unsigned undefined_mask:4;
unsigned usage_mask:4; /* TGSI_WRITEMASK_x */
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
unsigned semantic_index:8;
} outputs[PIPE_MAX_SHADER_OUTPUTS];
int num_outputs;
 
struct {
int index:16;
unsigned semantic_name:8; /* TGSI_SEMANTIC_x */
unsigned semantic_index:8;
} system_values[8];
int num_system_values;
 
bool uses_kill;
};
 
/**
* Find the slot of the TGSI input.
*/
static inline int
toy_tgsi_find_input(const struct toy_tgsi *tgsi, int index)
{
int slot;
 
for (slot = 0; slot < tgsi->num_inputs; slot++) {
if (tgsi->inputs[slot].index == index)
return slot;
}
 
return -1;
}
 
/**
* Find the slot of the TGSI system value.
*/
static inline int
toy_tgsi_find_system_value(const struct toy_tgsi *tgsi, int index)
{
int slot;
 
for (slot = 0; slot < tgsi->num_system_values; slot++) {
if (tgsi->system_values[slot].index == index)
return slot;
}
 
return -1;
}
 
/**
* Return the immediate data of the TGSI immediate.
*/
static inline const uint32_t *
toy_tgsi_get_imm(const struct toy_tgsi *tgsi, unsigned index,
enum toy_type *type)
{
const uint32_t *imm;
 
if (index >= tgsi->imm_data.cur)
return NULL;
 
imm = tgsi->imm_data.buf[index];
if (type)
*type = tgsi->imm_data.types[index];
 
return imm;
}
 
void
toy_compiler_translate_tgsi(struct toy_compiler *tc,
const struct tgsi_token *tokens, bool aos,
struct toy_tgsi *tgsi);
 
void
toy_tgsi_cleanup(struct toy_tgsi *tgsi);
 
int
toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
enum tgsi_file_type file, int dimension, int index);
 
void
toy_tgsi_dump(const struct toy_tgsi *tgsi);
 
#endif /* TOY_TGSI_H */