WebSVN – Kolibri OS – Path Comparison – / – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ Rev 5563 and /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ Rev 5564

Regard whitespace Rev 5563 → Rev 5564

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/Automake.inc
 ,0 → 1,11
+if HAVE_GALLIUM_FREEDRENO
+TARGET_DRIVERS += msm kgsl
+TARGET_CPPFLAGS += -DGALLIUM_FREEDRENO
+TARGET_LIB_DEPS += \
+        $(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \
+        $(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \
+        $(FREEDRENO_LIBS) \
+        $(LIBDRM_LIBS)
+endif

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/Makefile.am
 ,0 → 1,35
+AUTOMAKE_OPTIONS = subdir-objects
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+AM_CFLAGS = \
+        -Wno-packed-bitfield-compat \
+        -I$(top_srcdir)/src/gallium/drivers/freedreno/ir3 \
+        -I$(top_builddir)/src/glsl/nir \
+        $(GALLIUM_DRIVER_CFLAGS) \
+        $(FREEDRENO_CFLAGS)
+noinst_LTLIBRARIES = libfreedreno.la
+libfreedreno_la_SOURCES = \
+        $(C_SOURCES) \
+        $(a2xx_SOURCES) \
+        $(a3xx_SOURCES) \
+        $(a4xx_SOURCES) \
+        $(ir3_SOURCES)
+noinst_PROGRAMS = ir3_compiler
+ir3_compiler_SOURCES = \
+        ir3/ir3_cmdline.c
+ir3_compiler_LDADD = \
+        libfreedreno.la \
+        ../../auxiliary/libgallium.la \
+        $(top_builddir)/src/glsl/libnir.la \
+        $(top_builddir)/src/libglsl_util.la \
+        -lstdc++ \
+        $(top_builddir)/src/util/libmesautil.la \
+        $(GALLIUM_COMMON_LIB_DEPS) \
+        $(FREEDRENO_LIBS)

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/Makefile.in
 ,0 → 1,1237
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_DRISW_TRUE@am__append_1 = \
+@HAVE_DRISW_TRUE@       $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
+@NEED_WINSYS_XLIB_TRUE@am__append_2 = \
+@NEED_WINSYS_XLIB_TRUE@ $(top_builddir)/src/gallium/winsys/sw/xlib/libws_xlib.la \
+@NEED_WINSYS_XLIB_TRUE@ -lX11 -lXext -lXfixes \
+@NEED_WINSYS_XLIB_TRUE@ $(LIBDRM_LIBS)
+noinst_PROGRAMS = ir3_compiler$(EXEEXT)
+subdir = src/gallium/drivers/freedreno
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+        $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
+        $(top_srcdir)/m4/ax_gcc_builtin.m4 \
+        $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+        $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
+        $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+        $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+        $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libfreedreno_la_LIBADD =
+am__objects_1 = freedreno_context.lo freedreno_draw.lo \
+        freedreno_fence.lo freedreno_gmem.lo freedreno_program.lo \
+        freedreno_query.lo freedreno_query_hw.lo freedreno_query_sw.lo \
+        freedreno_resource.lo freedreno_screen.lo freedreno_state.lo \
+        freedreno_surface.lo freedreno_texture.lo freedreno_util.lo
+am__dirstamp = $(am__leading_dot)dirstamp
+am__objects_2 = a2xx/disasm-a2xx.lo a2xx/fd2_blend.lo \
+        a2xx/fd2_compiler.lo a2xx/fd2_context.lo a2xx/fd2_draw.lo \
+        a2xx/fd2_emit.lo a2xx/fd2_gmem.lo a2xx/fd2_program.lo \
+        a2xx/fd2_rasterizer.lo a2xx/fd2_screen.lo a2xx/fd2_texture.lo \
+        a2xx/fd2_util.lo a2xx/fd2_zsa.lo a2xx/ir-a2xx.lo
+am__objects_3 = a3xx/fd3_blend.lo a3xx/fd3_context.lo a3xx/fd3_draw.lo \
+        a3xx/fd3_emit.lo a3xx/fd3_format.lo a3xx/fd3_gmem.lo \
+        a3xx/fd3_program.lo a3xx/fd3_query.lo a3xx/fd3_rasterizer.lo \
+        a3xx/fd3_screen.lo a3xx/fd3_texture.lo a3xx/fd3_zsa.lo
+am__objects_4 = a4xx/fd4_blend.lo a4xx/fd4_context.lo a4xx/fd4_draw.lo \
+        a4xx/fd4_emit.lo a4xx/fd4_format.lo a4xx/fd4_gmem.lo \
+        a4xx/fd4_program.lo a4xx/fd4_query.lo a4xx/fd4_rasterizer.lo \
+        a4xx/fd4_screen.lo a4xx/fd4_texture.lo a4xx/fd4_zsa.lo
+am__objects_5 = ir3/disasm-a3xx.lo ir3/ir3.lo ir3/ir3_compiler.lo \
+        ir3/ir3_compiler_nir.lo ir3/ir3_cp.lo ir3/ir3_depth.lo \
+        ir3/ir3_dump.lo ir3/ir3_flatten.lo ir3/ir3_group.lo \
+        ir3/ir3_legalize.lo ir3/ir3_nir_lower_if_else.lo ir3/ir3_ra.lo \
+        ir3/ir3_sched.lo ir3/ir3_shader.lo
+am_libfreedreno_la_OBJECTS = $(am__objects_1) $(am__objects_2) \
+        $(am__objects_3) $(am__objects_4) $(am__objects_5)
+libfreedreno_la_OBJECTS = $(am_libfreedreno_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+PROGRAMS = $(noinst_PROGRAMS)
+am_ir3_compiler_OBJECTS = ir3/ir3_cmdline.$(OBJEXT)
+ir3_compiler_OBJECTS = $(am_ir3_compiler_OBJECTS)
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
+        $(am__DEPENDENCIES_1)
+ir3_compiler_DEPENDENCIES = libfreedreno.la \
+        ../../auxiliary/libgallium.la \
+        $(top_builddir)/src/glsl/libnir.la \
+        $(top_builddir)/src/libglsl_util.la \
+        $(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_2) \
+        $(am__DEPENDENCIES_1)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(libfreedreno_la_SOURCES) $(ir3_compiler_SOURCES)
+DIST_SOURCES = $(libfreedreno_la_SOURCES) $(ir3_compiler_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
+        $(top_srcdir)/bin/depcomp \
+        $(top_srcdir)/src/gallium/Automake.inc
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BSYMBOLIC = @BSYMBOLIC@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
+DEFINES = @DEFINES@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@
+DRI3PROTO_LIBS = @DRI3PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_CFLAGS = @EXPAT_CFLAGS@
+EXPAT_LIBS = @EXPAT_LIBS@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_PIPE_LOADER_CLIENT_DEFINES = @GALLIUM_PIPE_LOADER_CLIENT_DEFINES@
+GALLIUM_PIPE_LOADER_CLIENT_LIBS = @GALLIUM_PIPE_LOADER_CLIENT_LIBS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GC_SECTIONS = @GC_SECTIONS@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@
+LIBSHA1_LIBS = @LIBSHA1_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIB_EXT = @LIB_EXT@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+MSVC2008_COMPAT_CFLAGS = @MSVC2008_COMPAT_CFLAGS@
+MSVC2008_COMPAT_CXXFLAGS = @MSVC2008_COMPAT_CXXFLAGS@
+MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
+MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
+NINE_MAJOR = @NINE_MAJOR@
+NINE_MINOR = @NINE_MINOR@
+NINE_TINY = @NINE_TINY@
+NINE_VERSION = @NINE_VERSION@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OMX_CFLAGS = @OMX_CFLAGS@
+OMX_LIBS = @OMX_LIBS@
+OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OPENCL_LIBNAME = @OPENCL_LIBNAME@
+OPENSSL_CFLAGS = @OPENSSL_CFLAGS@
+OPENSSL_LIBS = @OPENSSL_LIBS@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@
+PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_CFLAGS = @SELINUX_CFLAGS@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHA1_CFLAGS = @SHA1_CFLAGS@
+SHA1_LIBS = @SHA1_LIBS@
+SHELL = @SHELL@
+SSE41_CFLAGS = @SSE41_CFLAGS@
+STRIP = @STRIP@
+VA_CFLAGS = @VA_CFLAGS@
+VA_LIBS = @VA_LIBS@
+VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
+VA_MAJOR = @VA_MAJOR@
+VA_MINOR = @VA_MINOR@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+VL_CFLAGS = @VL_CFLAGS@
+VL_LIBS = @VL_LIBS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acv_mako_found = @acv_mako_found@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+ifGNUmake = @ifGNUmake@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AUTOMAKE_OPTIONS = subdir-objects
+C_SOURCES := \
+        adreno_common.xml.h \
+        adreno_pm4.xml.h \
+        disasm.h \
+        freedreno_context.c \
+        freedreno_context.h \
+        freedreno_draw.c \
+        freedreno_draw.h \
+        freedreno_fence.c \
+        freedreno_fence.h \
+        freedreno_gmem.c \
+        freedreno_gmem.h \
+        freedreno_program.c \
+        freedreno_program.h \
+        freedreno_query.c \
+        freedreno_query.h \
+        freedreno_query_hw.c \
+        freedreno_query_hw.h \
+        freedreno_query_sw.c \
+        freedreno_query_sw.h \
+        freedreno_resource.c \
+        freedreno_resource.h \
+        freedreno_screen.c \
+        freedreno_screen.h \
+        freedreno_state.c \
+        freedreno_state.h \
+        freedreno_surface.c \
+        freedreno_surface.h \
+        freedreno_texture.c \
+        freedreno_texture.h \
+        freedreno_util.c \
+        freedreno_util.h
+a2xx_SOURCES := \
+        a2xx/a2xx.xml.h \
+        a2xx/disasm-a2xx.c \
+        a2xx/fd2_blend.c \
+        a2xx/fd2_blend.h \
+        a2xx/fd2_compiler.c \
+        a2xx/fd2_compiler.h \
+        a2xx/fd2_context.c \
+        a2xx/fd2_context.h \
+        a2xx/fd2_draw.c \
+        a2xx/fd2_draw.h \
+        a2xx/fd2_emit.c \
+        a2xx/fd2_emit.h \
+        a2xx/fd2_gmem.c \
+        a2xx/fd2_gmem.h \
+        a2xx/fd2_program.c \
+        a2xx/fd2_program.h \
+        a2xx/fd2_rasterizer.c \
+        a2xx/fd2_rasterizer.h \
+        a2xx/fd2_screen.c \
+        a2xx/fd2_screen.h \
+        a2xx/fd2_texture.c \
+        a2xx/fd2_texture.h \
+        a2xx/fd2_util.c \
+        a2xx/fd2_util.h \
+        a2xx/fd2_zsa.c \
+        a2xx/fd2_zsa.h \
+        a2xx/instr-a2xx.h \
+        a2xx/ir-a2xx.c \
+        a2xx/ir-a2xx.h
+a3xx_SOURCES := \
+        a3xx/a3xx.xml.h \
+        a3xx/fd3_blend.c \
+        a3xx/fd3_blend.h \
+        a3xx/fd3_context.c \
+        a3xx/fd3_context.h \
+        a3xx/fd3_draw.c \
+        a3xx/fd3_draw.h \
+        a3xx/fd3_emit.c \
+        a3xx/fd3_emit.h \
+        a3xx/fd3_format.c \
+        a3xx/fd3_format.h \
+        a3xx/fd3_gmem.c \
+        a3xx/fd3_gmem.h \
+        a3xx/fd3_program.c \
+        a3xx/fd3_program.h \
+        a3xx/fd3_query.c \
+        a3xx/fd3_query.h \
+        a3xx/fd3_rasterizer.c \
+        a3xx/fd3_rasterizer.h \
+        a3xx/fd3_screen.c \
+        a3xx/fd3_screen.h \
+        a3xx/fd3_texture.c \
+        a3xx/fd3_texture.h \
+        a3xx/fd3_zsa.c \
+        a3xx/fd3_zsa.h
+a4xx_SOURCES := \
+        a4xx/a4xx.xml.h \
+        a4xx/fd4_blend.c \
+        a4xx/fd4_blend.h \
+        a4xx/fd4_context.c \
+        a4xx/fd4_context.h \
+        a4xx/fd4_draw.c \
+        a4xx/fd4_draw.h \
+        a4xx/fd4_emit.c \
+        a4xx/fd4_emit.h \
+        a4xx/fd4_format.c \
+        a4xx/fd4_format.h \
+        a4xx/fd4_gmem.c \
+        a4xx/fd4_gmem.h \
+        a4xx/fd4_program.c \
+        a4xx/fd4_program.h \
+        a4xx/fd4_query.c \
+        a4xx/fd4_query.h \
+        a4xx/fd4_rasterizer.c \
+        a4xx/fd4_rasterizer.h \
+        a4xx/fd4_screen.c \
+        a4xx/fd4_screen.h \
+        a4xx/fd4_texture.c \
+        a4xx/fd4_texture.h \
+        a4xx/fd4_zsa.c \
+        a4xx/fd4_zsa.h
+ir3_SOURCES := \
+        ir3/disasm-a3xx.c \
+        ir3/instr-a3xx.h \
+        ir3/ir3.c \
+        ir3/ir3_compiler.c \
+        ir3/ir3_compiler_nir.c \
+        ir3/ir3_compiler.h \
+        ir3/ir3_cp.c \
+        ir3/ir3_depth.c \
+        ir3/ir3_dump.c \
+        ir3/ir3_flatten.c \
+        ir3/ir3_group.c \
+        ir3/ir3.h \
+        ir3/ir3_legalize.c \
+        ir3/ir3_nir.h \
+        ir3/ir3_nir_lower_if_else.c \
+        ir3/ir3_ra.c \
+        ir3/ir3_sched.c \
+        ir3/ir3_shader.c \
+        ir3/ir3_shader.h
+GALLIUM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        $(DEFINES)
+# src/gallium/auxiliary must appear before src/gallium/drivers
+# because there are stupidly two rbug_context.h files in
+# different directories, and which one is included by the
+# preprocessor is determined by the ordering of the -I flags.
+GALLIUM_DRIVER_CFLAGS = \
+        -I$(srcdir)/include \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        -I$(top_srcdir)/src/gallium/drivers \
+        -I$(top_srcdir)/src/gallium/winsys \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS)
+GALLIUM_DRIVER_CXXFLAGS = \
+        -I$(srcdir)/include \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        -I$(top_srcdir)/src/gallium/drivers \
+        -I$(top_srcdir)/src/gallium/winsys \
+        $(DEFINES) \
+        $(VISIBILITY_CXXFLAGS)
+GALLIUM_TARGET_CFLAGS = \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/loader \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        -I$(top_srcdir)/src/gallium/drivers \
+        -I$(top_srcdir)/src/gallium/winsys \
+        $(DEFINES) \
+        $(PTHREAD_CFLAGS) \
+        $(LIBDRM_CFLAGS) \
+        $(VISIBILITY_CFLAGS)
+GALLIUM_COMMON_LIB_DEPS = \
+        -lm \
+        $(CLOCK_LIB) \
+        $(PTHREAD_LIBS) \
+        $(DLOPEN_LIBS)
+GALLIUM_WINSYS_CFLAGS = \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS)
+GALLIUM_PIPE_LOADER_WINSYS_LIBS =  \
+        $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+        $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+        $(am__append_1) $(am__append_2)
+AM_CFLAGS = \
+        -Wno-packed-bitfield-compat \
+        -I$(top_srcdir)/src/gallium/drivers/freedreno/ir3 \
+        -I$(top_builddir)/src/glsl/nir \
+        $(GALLIUM_DRIVER_CFLAGS) \
+        $(FREEDRENO_CFLAGS)
+noinst_LTLIBRARIES = libfreedreno.la
+libfreedreno_la_SOURCES = \
+        $(C_SOURCES) \
+        $(a2xx_SOURCES) \
+        $(a3xx_SOURCES) \
+        $(a4xx_SOURCES) \
+        $(ir3_SOURCES)
+ir3_compiler_SOURCES = \
+        ir3/ir3_cmdline.c
+ir3_compiler_LDADD = \
+        libfreedreno.la \
+        ../../auxiliary/libgallium.la \
+        $(top_builddir)/src/glsl/libnir.la \
+        $(top_builddir)/src/libglsl_util.la \
+        -lstdc++ \
+        $(top_builddir)/src/util/libmesautil.la \
+        $(GALLIUM_COMMON_LIB_DEPS) \
+        $(FREEDRENO_LIBS)
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/freedreno/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/gallium/drivers/freedreno/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty):
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+clean-noinstLTLIBRARIES:
+        -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+        @list='$(noinst_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+a2xx/$(am__dirstamp):
+        @$(MKDIR_P) a2xx
+        @: > a2xx/$(am__dirstamp)
+a2xx/$(DEPDIR)/$(am__dirstamp):
+        @$(MKDIR_P) a2xx/$(DEPDIR)
+        @: > a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/disasm-a2xx.lo: a2xx/$(am__dirstamp) \
+        a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_blend.lo: a2xx/$(am__dirstamp) a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_compiler.lo: a2xx/$(am__dirstamp) \
+        a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_context.lo: a2xx/$(am__dirstamp) \
+        a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_draw.lo: a2xx/$(am__dirstamp) a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_emit.lo: a2xx/$(am__dirstamp) a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_gmem.lo: a2xx/$(am__dirstamp) a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_program.lo: a2xx/$(am__dirstamp) \
+        a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_rasterizer.lo: a2xx/$(am__dirstamp) \
+        a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_screen.lo: a2xx/$(am__dirstamp) \
+        a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_texture.lo: a2xx/$(am__dirstamp) \
+        a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_util.lo: a2xx/$(am__dirstamp) a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/fd2_zsa.lo: a2xx/$(am__dirstamp) a2xx/$(DEPDIR)/$(am__dirstamp)
+a2xx/ir-a2xx.lo: a2xx/$(am__dirstamp) a2xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/$(am__dirstamp):
+        @$(MKDIR_P) a3xx
+        @: > a3xx/$(am__dirstamp)
+a3xx/$(DEPDIR)/$(am__dirstamp):
+        @$(MKDIR_P) a3xx/$(DEPDIR)
+        @: > a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_blend.lo: a3xx/$(am__dirstamp) a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_context.lo: a3xx/$(am__dirstamp) \
+        a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_draw.lo: a3xx/$(am__dirstamp) a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_emit.lo: a3xx/$(am__dirstamp) a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_format.lo: a3xx/$(am__dirstamp) \
+        a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_gmem.lo: a3xx/$(am__dirstamp) a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_program.lo: a3xx/$(am__dirstamp) \
+        a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_query.lo: a3xx/$(am__dirstamp) a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_rasterizer.lo: a3xx/$(am__dirstamp) \
+        a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_screen.lo: a3xx/$(am__dirstamp) \
+        a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_texture.lo: a3xx/$(am__dirstamp) \
+        a3xx/$(DEPDIR)/$(am__dirstamp)
+a3xx/fd3_zsa.lo: a3xx/$(am__dirstamp) a3xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/$(am__dirstamp):
+        @$(MKDIR_P) a4xx
+        @: > a4xx/$(am__dirstamp)
+a4xx/$(DEPDIR)/$(am__dirstamp):
+        @$(MKDIR_P) a4xx/$(DEPDIR)
+        @: > a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_blend.lo: a4xx/$(am__dirstamp) a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_context.lo: a4xx/$(am__dirstamp) \
+        a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_draw.lo: a4xx/$(am__dirstamp) a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_emit.lo: a4xx/$(am__dirstamp) a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_format.lo: a4xx/$(am__dirstamp) \
+        a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_gmem.lo: a4xx/$(am__dirstamp) a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_program.lo: a4xx/$(am__dirstamp) \
+        a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_query.lo: a4xx/$(am__dirstamp) a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_rasterizer.lo: a4xx/$(am__dirstamp) \
+        a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_screen.lo: a4xx/$(am__dirstamp) \
+        a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_texture.lo: a4xx/$(am__dirstamp) \
+        a4xx/$(DEPDIR)/$(am__dirstamp)
+a4xx/fd4_zsa.lo: a4xx/$(am__dirstamp) a4xx/$(DEPDIR)/$(am__dirstamp)
+ir3/$(am__dirstamp):
+        @$(MKDIR_P) ir3
+        @: > ir3/$(am__dirstamp)
+ir3/$(DEPDIR)/$(am__dirstamp):
+        @$(MKDIR_P) ir3/$(DEPDIR)
+        @: > ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/disasm-a3xx.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_compiler.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_compiler_nir.lo: ir3/$(am__dirstamp) \
+        ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_cp.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_depth.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_dump.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_flatten.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_group.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_legalize.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_nir_lower_if_else.lo: ir3/$(am__dirstamp) \
+        ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_ra.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_sched.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+ir3/ir3_shader.lo: ir3/$(am__dirstamp) ir3/$(DEPDIR)/$(am__dirstamp)
+libfreedreno.la: $(libfreedreno_la_OBJECTS) $(libfreedreno_la_DEPENDENCIES) $(EXTRA_libfreedreno_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(LINK)  $(libfreedreno_la_OBJECTS) $(libfreedreno_la_LIBADD) $(LIBS)
+clean-noinstPROGRAMS:
+        @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
+        echo " rm -f" $$list; \
+        rm -f $$list || exit $$?; \
+        test -n "$(EXEEXT)" || exit 0; \
+        list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+        echo " rm -f" $$list; \
+        rm -f $$list
+ir3/ir3_cmdline.$(OBJEXT): ir3/$(am__dirstamp) \
+        ir3/$(DEPDIR)/$(am__dirstamp)
+ir3_compiler$(EXEEXT): $(ir3_compiler_OBJECTS) $(ir3_compiler_DEPENDENCIES) $(EXTRA_ir3_compiler_DEPENDENCIES)
+        @rm -f ir3_compiler$(EXEEXT)
+        $(AM_V_CCLD)$(LINK) $(ir3_compiler_OBJECTS) $(ir3_compiler_LDADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+        -rm -f a2xx/*.$(OBJEXT)
+        -rm -f a2xx/*.lo
+        -rm -f a3xx/*.$(OBJEXT)
+        -rm -f a3xx/*.lo
+        -rm -f a4xx/*.$(OBJEXT)
+        -rm -f a4xx/*.lo
+        -rm -f ir3/*.$(OBJEXT)
+        -rm -f ir3/*.lo
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_fence.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_gmem.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_program.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_query_hw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_query_sw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_resource.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_surface.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/freedreno_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/disasm-a2xx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_blend.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_compiler.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_gmem.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_program.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_rasterizer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/fd2_zsa.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a2xx/$(DEPDIR)/ir-a2xx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_blend.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_format.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_gmem.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_program.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_rasterizer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a3xx/$(DEPDIR)/fd3_zsa.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_blend.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_format.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_gmem.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_program.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_rasterizer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@a4xx/$(DEPDIR)/fd4_zsa.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/disasm-a3xx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_cmdline.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_compiler.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_compiler_nir.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_cp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_depth.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_dump.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_flatten.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_group.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_legalize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_nir_lower_if_else.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_ra.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_sched.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@ir3/$(DEPDIR)/ir3_shader.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@    $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@    $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@    $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@    $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@    $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@    $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+        -rm -rf a2xx/.libs a2xx/_libs
+        -rm -rf a3xx/.libs a3xx/_libs
+        -rm -rf a4xx/.libs a4xx/_libs
+        -rm -rf ir3/.libs ir3/_libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+        -rm -f a2xx/$(DEPDIR)/$(am__dirstamp)
+        -rm -f a2xx/$(am__dirstamp)
+        -rm -f a3xx/$(DEPDIR)/$(am__dirstamp)
+        -rm -f a3xx/$(am__dirstamp)
+        -rm -f a4xx/$(DEPDIR)/$(am__dirstamp)
+        -rm -f a4xx/$(am__dirstamp)
+        -rm -f ir3/$(DEPDIR)/$(am__dirstamp)
+        -rm -f ir3/$(am__dirstamp)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+        clean-noinstPROGRAMS mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR) a2xx/$(DEPDIR) a3xx/$(DEPDIR) a4xx/$(DEPDIR) ir3/$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am:
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR) a2xx/$(DEPDIR) a3xx/$(DEPDIR) a4xx/$(DEPDIR) ir3/$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am:
+.MAKE: install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+        clean-libtool clean-noinstLTLIBRARIES clean-noinstPROGRAMS \
+        cscopelist-am ctags ctags-am distclean distclean-compile \
+        distclean-generic distclean-libtool distclean-tags distdir dvi \
+        dvi-am html html-am info info-am install install-am \
+        install-data install-data-am install-dvi install-dvi-am \
+        install-exec install-exec-am install-html install-html-am \
+        install-info install-info-am install-man install-pdf \
+        install-pdf-am install-ps install-ps-am install-strip \
+        installcheck installcheck-am installdirs maintainer-clean \
+        maintainer-clean-generic mostlyclean mostlyclean-compile \
+        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+        tags tags-am uninstall uninstall-am
+.PRECIOUS: Makefile
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/Makefile.sources
 ,0 → 1,138
+C_SOURCES := \
+        adreno_common.xml.h \
+        adreno_pm4.xml.h \
+        disasm.h \
+        freedreno_context.c \
+        freedreno_context.h \
+        freedreno_draw.c \
+        freedreno_draw.h \
+        freedreno_fence.c \
+        freedreno_fence.h \
+        freedreno_gmem.c \
+        freedreno_gmem.h \
+        freedreno_program.c \
+        freedreno_program.h \
+        freedreno_query.c \
+        freedreno_query.h \
+        freedreno_query_hw.c \
+        freedreno_query_hw.h \
+        freedreno_query_sw.c \
+        freedreno_query_sw.h \
+        freedreno_resource.c \
+        freedreno_resource.h \
+        freedreno_screen.c \
+        freedreno_screen.h \
+        freedreno_state.c \
+        freedreno_state.h \
+        freedreno_surface.c \
+        freedreno_surface.h \
+        freedreno_texture.c \
+        freedreno_texture.h \
+        freedreno_util.c \
+        freedreno_util.h
+a2xx_SOURCES := \
+        a2xx/a2xx.xml.h \
+        a2xx/disasm-a2xx.c \
+        a2xx/fd2_blend.c \
+        a2xx/fd2_blend.h \
+        a2xx/fd2_compiler.c \
+        a2xx/fd2_compiler.h \
+        a2xx/fd2_context.c \
+        a2xx/fd2_context.h \
+        a2xx/fd2_draw.c \
+        a2xx/fd2_draw.h \
+        a2xx/fd2_emit.c \
+        a2xx/fd2_emit.h \
+        a2xx/fd2_gmem.c \
+        a2xx/fd2_gmem.h \
+        a2xx/fd2_program.c \
+        a2xx/fd2_program.h \
+        a2xx/fd2_rasterizer.c \
+        a2xx/fd2_rasterizer.h \
+        a2xx/fd2_screen.c \
+        a2xx/fd2_screen.h \
+        a2xx/fd2_texture.c \
+        a2xx/fd2_texture.h \
+        a2xx/fd2_util.c \
+        a2xx/fd2_util.h \
+        a2xx/fd2_zsa.c \
+        a2xx/fd2_zsa.h \
+        a2xx/instr-a2xx.h \
+        a2xx/ir-a2xx.c \
+        a2xx/ir-a2xx.h
+a3xx_SOURCES := \
+        a3xx/a3xx.xml.h \
+        a3xx/fd3_blend.c \
+        a3xx/fd3_blend.h \
+        a3xx/fd3_context.c \
+        a3xx/fd3_context.h \
+        a3xx/fd3_draw.c \
+        a3xx/fd3_draw.h \
+        a3xx/fd3_emit.c \
+        a3xx/fd3_emit.h \
+        a3xx/fd3_format.c \
+        a3xx/fd3_format.h \
+        a3xx/fd3_gmem.c \
+        a3xx/fd3_gmem.h \
+        a3xx/fd3_program.c \
+        a3xx/fd3_program.h \
+        a3xx/fd3_query.c \
+        a3xx/fd3_query.h \
+        a3xx/fd3_rasterizer.c \
+        a3xx/fd3_rasterizer.h \
+        a3xx/fd3_screen.c \
+        a3xx/fd3_screen.h \
+        a3xx/fd3_texture.c \
+        a3xx/fd3_texture.h \
+        a3xx/fd3_zsa.c \
+        a3xx/fd3_zsa.h
+a4xx_SOURCES := \
+        a4xx/a4xx.xml.h \
+        a4xx/fd4_blend.c \
+        a4xx/fd4_blend.h \
+        a4xx/fd4_context.c \
+        a4xx/fd4_context.h \
+        a4xx/fd4_draw.c \
+        a4xx/fd4_draw.h \
+        a4xx/fd4_emit.c \
+        a4xx/fd4_emit.h \
+        a4xx/fd4_format.c \
+        a4xx/fd4_format.h \
+        a4xx/fd4_gmem.c \
+        a4xx/fd4_gmem.h \
+        a4xx/fd4_program.c \
+        a4xx/fd4_program.h \
+        a4xx/fd4_query.c \
+        a4xx/fd4_query.h \
+        a4xx/fd4_rasterizer.c \
+        a4xx/fd4_rasterizer.h \
+        a4xx/fd4_screen.c \
+        a4xx/fd4_screen.h \
+        a4xx/fd4_texture.c \
+        a4xx/fd4_texture.h \
+        a4xx/fd4_zsa.c \
+        a4xx/fd4_zsa.h
+ir3_SOURCES := \
+        ir3/disasm-a3xx.c \
+        ir3/instr-a3xx.h \
+        ir3/ir3.c \
+        ir3/ir3_compiler.c \
+        ir3/ir3_compiler_nir.c \
+        ir3/ir3_compiler.h \
+        ir3/ir3_cp.c \
+        ir3/ir3_depth.c \
+        ir3/ir3_dump.c \
+        ir3/ir3_flatten.c \
+        ir3/ir3_group.c \
+        ir3/ir3.h \
+        ir3/ir3_legalize.c \
+        ir3/ir3_nir.h \
+        ir3/ir3_nir_lower_if_else.c \
+        ir3/ir3_ra.c \
+        ir3/ir3_sched.c \
+        ir3/ir3_shader.c \
+        ir3/ir3_shader.h

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
 ,0 → 1,1597
+#ifndef A2XX_XML
+#define A2XX_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+Copyright (C) 2013-2014 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+enum a2xx_rb_dither_type {
+        DITHER_PIXEL = 0,
+        DITHER_SUBPIXEL = 1,
+};
+enum a2xx_colorformatx {
+        COLORX_4_4_4_4 = 0,
+        COLORX_1_5_5_5 = 1,
+        COLORX_5_6_5 = 2,
+        COLORX_8 = 3,
+        COLORX_8_8 = 4,
+        COLORX_8_8_8_8 = 5,
+        COLORX_S8_8_8_8 = 6,
+        COLORX_16_FLOAT = 7,
+        COLORX_16_16_FLOAT = 8,
+        COLORX_16_16_16_16_FLOAT = 9,
+        COLORX_32_FLOAT = 10,
+        COLORX_32_32_FLOAT = 11,
+        COLORX_32_32_32_32_FLOAT = 12,
+        COLORX_2_3_3 = 13,
+        COLORX_8_8_8 = 14,
+};
+enum a2xx_sq_surfaceformat {
+        FMT_1_REVERSE = 0,
+        FMT_1 = 1,
+        FMT_8 = 2,
+        FMT_1_5_5_5 = 3,
+        FMT_5_6_5 = 4,
+        FMT_6_5_5 = 5,
+        FMT_8_8_8_8 = 6,
+        FMT_2_10_10_10 = 7,
+        FMT_8_A = 8,
+        FMT_8_B = 9,
+        FMT_8_8 = 10,
+        FMT_Cr_Y1_Cb_Y0 = 11,
+        FMT_Y1_Cr_Y0_Cb = 12,
+        FMT_5_5_5_1 = 13,
+        FMT_8_8_8_8_A = 14,
+        FMT_4_4_4_4 = 15,
+        FMT_10_11_11 = 16,
+        FMT_11_11_10 = 17,
+        FMT_DXT1 = 18,
+        FMT_DXT2_3 = 19,
+        FMT_DXT4_5 = 20,
+        FMT_24_8 = 22,
+        FMT_24_8_FLOAT = 23,
+        FMT_16 = 24,
+        FMT_16_16 = 25,
+        FMT_16_16_16_16 = 26,
+        FMT_16_EXPAND = 27,
+        FMT_16_16_EXPAND = 28,
+        FMT_16_16_16_16_EXPAND = 29,
+        FMT_16_FLOAT = 30,
+        FMT_16_16_FLOAT = 31,
+        FMT_16_16_16_16_FLOAT = 32,
+        FMT_32 = 33,
+        FMT_32_32 = 34,
+        FMT_32_32_32_32 = 35,
+        FMT_32_FLOAT = 36,
+        FMT_32_32_FLOAT = 37,
+        FMT_32_32_32_32_FLOAT = 38,
+        FMT_32_AS_8 = 39,
+        FMT_32_AS_8_8 = 40,
+        FMT_16_MPEG = 41,
+        FMT_16_16_MPEG = 42,
+        FMT_8_INTERLACED = 43,
+        FMT_32_AS_8_INTERLACED = 44,
+        FMT_32_AS_8_8_INTERLACED = 45,
+        FMT_16_INTERLACED = 46,
+        FMT_16_MPEG_INTERLACED = 47,
+        FMT_16_16_MPEG_INTERLACED = 48,
+        FMT_DXN = 49,
+        FMT_8_8_8_8_AS_16_16_16_16 = 50,
+        FMT_DXT1_AS_16_16_16_16 = 51,
+        FMT_DXT2_3_AS_16_16_16_16 = 52,
+        FMT_DXT4_5_AS_16_16_16_16 = 53,
+        FMT_2_10_10_10_AS_16_16_16_16 = 54,
+        FMT_10_11_11_AS_16_16_16_16 = 55,
+        FMT_11_11_10_AS_16_16_16_16 = 56,
+        FMT_32_32_32_FLOAT = 57,
+        FMT_DXT3A = 58,
+        FMT_DXT5A = 59,
+        FMT_CTX1 = 60,
+        FMT_DXT3A_AS_1_1_1_1 = 61,
+};
+enum a2xx_sq_ps_vtx_mode {
+        POSITION_1_VECTOR = 0,
+        POSITION_2_VECTORS_UNUSED = 1,
+        POSITION_2_VECTORS_SPRITE = 2,
+        POSITION_2_VECTORS_EDGE = 3,
+        POSITION_2_VECTORS_KILL = 4,
+        POSITION_2_VECTORS_SPRITE_KILL = 5,
+        POSITION_2_VECTORS_EDGE_KILL = 6,
+        MULTIPASS = 7,
+};
+enum a2xx_sq_sample_cntl {
+        CENTROIDS_ONLY = 0,
+        CENTERS_ONLY = 1,
+        CENTROIDS_AND_CENTERS = 2,
+};
+enum a2xx_dx_clip_space {
+        DXCLIP_OPENGL = 0,
+        DXCLIP_DIRECTX = 1,
+};
+enum a2xx_pa_su_sc_polymode {
+        POLY_DISABLED = 0,
+        POLY_DUALMODE = 1,
+};
+enum a2xx_rb_edram_mode {
+        EDRAM_NOP = 0,
+        COLOR_DEPTH = 4,
+        DEPTH_ONLY = 5,
+        EDRAM_COPY = 6,
+};
+enum a2xx_pa_sc_pattern_bit_order {
+        LITTLE = 0,
+        BIG = 1,
+};
+enum a2xx_pa_sc_auto_reset_cntl {
+        NEVER = 0,
+        EACH_PRIMITIVE = 1,
+        EACH_PACKET = 2,
+};
+enum a2xx_pa_pixcenter {
+        PIXCENTER_D3D = 0,
+        PIXCENTER_OGL = 1,
+};
+enum a2xx_pa_roundmode {
+        TRUNCATE = 0,
+        ROUND = 1,
+        ROUNDTOEVEN = 2,
+        ROUNDTOODD = 3,
+};
+enum a2xx_pa_quantmode {
+        ONE_SIXTEENTH = 0,
+        ONE_EIGTH = 1,
+        ONE_QUARTER = 2,
+        ONE_HALF = 3,
+        ONE = 4,
+};
+enum a2xx_rb_copy_sample_select {
+        SAMPLE_0 = 0,
+        SAMPLE_1 = 1,
+        SAMPLE_2 = 2,
+        SAMPLE_3 = 3,
+        SAMPLE_01 = 4,
+        SAMPLE_23 = 5,
+        SAMPLE_0123 = 6,
+};
+enum a2xx_rb_blend_opcode {
+        BLEND_DST_PLUS_SRC = 0,
+        BLEND_SRC_MINUS_DST = 1,
+        BLEND_MIN_DST_SRC = 2,
+        BLEND_MAX_DST_SRC = 3,
+        BLEND_DST_MINUS_SRC = 4,
+        BLEND_DST_PLUS_SRC_BIAS = 5,
+};
+enum adreno_mmu_clnt_beh {
+        BEH_NEVR = 0,
+        BEH_TRAN_RNG = 1,
+        BEH_TRAN_FLT = 2,
+};
+enum sq_tex_clamp {
+        SQ_TEX_WRAP = 0,
+        SQ_TEX_MIRROR = 1,
+        SQ_TEX_CLAMP_LAST_TEXEL = 2,
+        SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 3,
+        SQ_TEX_CLAMP_HALF_BORDER = 4,
+        SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5,
+        SQ_TEX_CLAMP_BORDER = 6,
+        SQ_TEX_MIRROR_ONCE_BORDER = 7,
+};
+enum sq_tex_swiz {
+        SQ_TEX_X = 0,
+        SQ_TEX_Y = 1,
+        SQ_TEX_Z = 2,
+        SQ_TEX_W = 3,
+        SQ_TEX_ZERO = 4,
+        SQ_TEX_ONE = 5,
+};
+enum sq_tex_filter {
+        SQ_TEX_FILTER_POINT = 0,
+        SQ_TEX_FILTER_BILINEAR = 1,
+        SQ_TEX_FILTER_BICUBIC = 2,
+};
+#define REG_A2XX_RBBM_PATCH_RELEASE                             0x00000001
+#define REG_A2XX_RBBM_CNTL                                      0x0000003b
+#define REG_A2XX_RBBM_SOFT_RESET                                0x0000003c
+#define REG_A2XX_CP_PFP_UCODE_ADDR                              0x000000c0
+#define REG_A2XX_CP_PFP_UCODE_DATA                              0x000000c1
+#define REG_A2XX_MH_MMU_CONFIG                                  0x00000040
+#define A2XX_MH_MMU_CONFIG_MMU_ENABLE                           0x00000001
+#define A2XX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE                    0x00000002
+#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK             0x00000030
+#define A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT            4
+static inline uint32_t A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK             0x000000c0
+#define A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT            6
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK            0x00000300
+#define A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT           8
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK            0x00000c00
+#define A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT           10
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK            0x00003000
+#define A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT           12
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK            0x0000c000
+#define A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT           14
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK            0x00030000
+#define A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT           16
+static inline uint32_t A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK           0x000c0000
+#define A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT          18
+static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK           0x00300000
+#define A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT          20
+static inline uint32_t A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK             0x00c00000
+#define A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT            22
+static inline uint32_t A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK;
+}
+#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK             0x03000000
+#define A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT            24
+static inline uint32_t A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+        return ((val) << A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & A2XX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK;
+}
+#define REG_A2XX_MH_MMU_VA_RANGE                                0x00000041
+#define REG_A2XX_MH_MMU_PT_BASE                                 0x00000042
+#define REG_A2XX_MH_MMU_PAGE_FAULT                              0x00000043
+#define REG_A2XX_MH_MMU_TRAN_ERROR                              0x00000044
+#define REG_A2XX_MH_MMU_INVALIDATE                              0x00000045
+#define REG_A2XX_MH_MMU_MPU_BASE                                0x00000046
+#define REG_A2XX_MH_MMU_MPU_END                                 0x00000047
+#define REG_A2XX_NQWAIT_UNTIL                                   0x00000394
+#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT                       0x00000395
+#define REG_A2XX_RBBM_PERFCOUNTER1_LO                           0x00000397
+#define REG_A2XX_RBBM_PERFCOUNTER1_HI                           0x00000398
+#define REG_A2XX_RBBM_DEBUG                                     0x0000039b
+#define REG_A2XX_RBBM_PM_OVERRIDE1                              0x0000039c
+#define REG_A2XX_RBBM_PM_OVERRIDE2                              0x0000039d
+#define REG_A2XX_RBBM_DEBUG_OUT                                 0x000003a0
+#define REG_A2XX_RBBM_DEBUG_CNTL                                0x000003a1
+#define REG_A2XX_RBBM_READ_ERROR                                0x000003b3
+#define REG_A2XX_RBBM_INT_CNTL                                  0x000003b4
+#define REG_A2XX_RBBM_INT_STATUS                                0x000003b5
+#define REG_A2XX_RBBM_INT_ACK                                   0x000003b6
+#define REG_A2XX_MASTER_INT_SIGNAL                              0x000003b7
+#define REG_A2XX_RBBM_PERIPHID1                                 0x000003f9
+#define REG_A2XX_RBBM_PERIPHID2                                 0x000003fa
+#define REG_A2XX_CP_PERFMON_CNTL                                0x00000444
+#define REG_A2XX_CP_PERFCOUNTER_SELECT                          0x00000445
+#define REG_A2XX_CP_PERFCOUNTER_LO                              0x00000446
+#define REG_A2XX_CP_PERFCOUNTER_HI                              0x00000447
+#define REG_A2XX_RBBM_STATUS                                    0x000005d0
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK                    0x0000001f
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT                   0
+static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val)
+{
+        return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK;
+}
+#define A2XX_RBBM_STATUS_TC_BUSY                                0x00000020
+#define A2XX_RBBM_STATUS_HIRQ_PENDING                           0x00000100
+#define A2XX_RBBM_STATUS_CPRQ_PENDING                           0x00000200
+#define A2XX_RBBM_STATUS_CFRQ_PENDING                           0x00000400
+#define A2XX_RBBM_STATUS_PFRQ_PENDING                           0x00000800
+#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA                        0x00001000
+#define A2XX_RBBM_STATUS_RBBM_WU_BUSY                           0x00004000
+#define A2XX_RBBM_STATUS_CP_NRT_BUSY                            0x00010000
+#define A2XX_RBBM_STATUS_MH_BUSY                                0x00040000
+#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY                      0x00080000
+#define A2XX_RBBM_STATUS_SX_BUSY                                0x00200000
+#define A2XX_RBBM_STATUS_TPC_BUSY                               0x00400000
+#define A2XX_RBBM_STATUS_SC_CNTX_BUSY                           0x01000000
+#define A2XX_RBBM_STATUS_PA_BUSY                                0x02000000
+#define A2XX_RBBM_STATUS_VGT_BUSY                               0x04000000
+#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY                         0x08000000
+#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY                          0x10000000
+#define A2XX_RBBM_STATUS_RB_CNTX_BUSY                           0x40000000
+#define A2XX_RBBM_STATUS_GUI_ACTIVE                             0x80000000
+#define REG_A2XX_MH_ARBITER_CONFIG                              0x00000a40
+#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK            0x0000003f
+#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT           0
+static inline uint32_t A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT(uint32_t val)
+{
+        return ((val) << A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_SAME_PAGE_LIMIT__MASK;
+}
+#define A2XX_MH_ARBITER_CONFIG_SAME_PAGE_GRANULARITY            0x00000040
+#define A2XX_MH_ARBITER_CONFIG_L1_ARB_ENABLE                    0x00000080
+#define A2XX_MH_ARBITER_CONFIG_L1_ARB_HOLD_ENABLE               0x00000100
+#define A2XX_MH_ARBITER_CONFIG_L2_ARB_CONTROL                   0x00000200
+#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK                  0x00001c00
+#define A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT                 10
+static inline uint32_t A2XX_MH_ARBITER_CONFIG_PAGE_SIZE(uint32_t val)
+{
+        return ((val) << A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__SHIFT) & A2XX_MH_ARBITER_CONFIG_PAGE_SIZE__MASK;
+}
+#define A2XX_MH_ARBITER_CONFIG_TC_REORDER_ENABLE                0x00002000
+#define A2XX_MH_ARBITER_CONFIG_TC_ARB_HOLD_ENABLE               0x00004000
+#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT_ENABLE           0x00008000
+#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK            0x003f0000
+#define A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT           16
+static inline uint32_t A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT(uint32_t val)
+{
+        return ((val) << A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__SHIFT) & A2XX_MH_ARBITER_CONFIG_IN_FLIGHT_LIMIT__MASK;
+}
+#define A2XX_MH_ARBITER_CONFIG_CP_CLNT_ENABLE                   0x00400000
+#define A2XX_MH_ARBITER_CONFIG_VGT_CLNT_ENABLE                  0x00800000
+#define A2XX_MH_ARBITER_CONFIG_TC_CLNT_ENABLE                   0x01000000
+#define A2XX_MH_ARBITER_CONFIG_RB_CLNT_ENABLE                   0x02000000
+#define A2XX_MH_ARBITER_CONFIG_PA_CLNT_ENABLE                   0x04000000
+#define REG_A2XX_A220_VSC_BIN_SIZE                              0x00000c01
+#define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK                      0x0000001f
+#define A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT                     0
+static inline uint32_t A2XX_A220_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+        return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT) & A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK                     0x000003e0
+#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT                    5
+static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+        return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+#define REG_A2XX_PC_DEBUG_CNTL                                  0x00000c38
+#define REG_A2XX_PC_DEBUG_DATA                                  0x00000c39
+#define REG_A2XX_PA_SC_VIZ_QUERY_STATUS                         0x00000c44
+#define REG_A2XX_GRAS_DEBUG_CNTL                                0x00000c80
+#define REG_A2XX_PA_SU_DEBUG_CNTL                               0x00000c80
+#define REG_A2XX_GRAS_DEBUG_DATA                                0x00000c81
+#define REG_A2XX_PA_SU_DEBUG_DATA                               0x00000c81
+#define REG_A2XX_PA_SU_FACE_DATA                                0x00000c86
+#define REG_A2XX_SQ_GPR_MANAGEMENT                              0x00000d00
+#define REG_A2XX_SQ_FLOW_CONTROL                                0x00000d01
+#define REG_A2XX_SQ_INST_STORE_MANAGMENT                        0x00000d02
+#define REG_A2XX_SQ_DEBUG_MISC                                  0x00000d05
+#define REG_A2XX_SQ_INT_CNTL                                    0x00000d34
+#define REG_A2XX_SQ_INT_STATUS                                  0x00000d35
+#define REG_A2XX_SQ_INT_ACK                                     0x00000d36
+#define REG_A2XX_SQ_DEBUG_INPUT_FSM                             0x00000dae
+#define REG_A2XX_SQ_DEBUG_CONST_MGR_FSM                         0x00000daf
+#define REG_A2XX_SQ_DEBUG_TP_FSM                                0x00000db0
+#define REG_A2XX_SQ_DEBUG_FSM_ALU_0                             0x00000db1
+#define REG_A2XX_SQ_DEBUG_FSM_ALU_1                             0x00000db2
+#define REG_A2XX_SQ_DEBUG_EXP_ALLOC                             0x00000db3
+#define REG_A2XX_SQ_DEBUG_PTR_BUFF                              0x00000db4
+#define REG_A2XX_SQ_DEBUG_GPR_VTX                               0x00000db5
+#define REG_A2XX_SQ_DEBUG_GPR_PIX                               0x00000db6
+#define REG_A2XX_SQ_DEBUG_TB_STATUS_SEL                         0x00000db7
+#define REG_A2XX_SQ_DEBUG_VTX_TB_0                              0x00000db8
+#define REG_A2XX_SQ_DEBUG_VTX_TB_1                              0x00000db9
+#define REG_A2XX_SQ_DEBUG_VTX_TB_STATUS_REG                     0x00000dba
+#define REG_A2XX_SQ_DEBUG_VTX_TB_STATE_MEM                      0x00000dbb
+#define REG_A2XX_SQ_DEBUG_PIX_TB_0                              0x00000dbc
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_0                   0x00000dbd
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_1                   0x00000dbe
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_2                   0x00000dbf
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_3                   0x00000dc0
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATE_MEM                      0x00000dc1
+#define REG_A2XX_TC_CNTL_STATUS                                 0x00000e00
+#define A2XX_TC_CNTL_STATUS_L2_INVALIDATE                       0x00000001
+#define REG_A2XX_TP0_CHICKEN                                    0x00000e1e
+#define REG_A2XX_RB_BC_CONTROL                                  0x00000f01
+#define A2XX_RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE             0x00000001
+#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK           0x00000006
+#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT          1
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(uint32_t val)
+{
+        return ((val) << A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_DISABLE_EDRAM_CAM                    0x00000008
+#define A2XX_RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH       0x00000010
+#define A2XX_RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP            0x00000020
+#define A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP            0x00000040
+#define A2XX_RB_BC_CONTROL_ENABLE_AZ_THROTTLE                   0x00000080
+#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK              0x00001f00
+#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT             8
+static inline uint32_t A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT(uint32_t val)
+{
+        return ((val) << A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT) & A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE                    0x00004000
+#define A2XX_RB_BC_CONTROL_CRC_MODE                             0x00008000
+#define A2XX_RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS              0x00010000
+#define A2XX_RB_BC_CONTROL_DISABLE_ACCUM                        0x00020000
+#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK               0x003c0000
+#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT              18
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK(uint32_t val)
+{
+        return ((val) << A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK;
+}
+#define A2XX_RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE            0x00400000
+#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK          0x07800000
+#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT         23
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(uint32_t val)
+{
+        return ((val) << A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK      0x18000000
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT     27
+static inline uint32_t A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(uint32_t val)
+{
+        return ((val) << A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE        0x20000000
+#define A2XX_RB_BC_CONTROL_CRC_SYSTEM                           0x40000000
+#define A2XX_RB_BC_CONTROL_RESERVED6                            0x80000000
+#define REG_A2XX_RB_EDRAM_INFO                                  0x00000f02
+#define REG_A2XX_RB_DEBUG_CNTL                                  0x00000f26
+#define REG_A2XX_RB_DEBUG_DATA                                  0x00000f27
+#define REG_A2XX_RB_SURFACE_INFO                                0x00002000
+#define REG_A2XX_RB_COLOR_INFO                                  0x00002001
+#define A2XX_RB_COLOR_INFO_FORMAT__MASK                         0x0000000f
+#define A2XX_RB_COLOR_INFO_FORMAT__SHIFT                        0
+static inline uint32_t A2XX_RB_COLOR_INFO_FORMAT(enum a2xx_colorformatx val)
+{
+        return ((val) << A2XX_RB_COLOR_INFO_FORMAT__SHIFT) & A2XX_RB_COLOR_INFO_FORMAT__MASK;
+}
+#define A2XX_RB_COLOR_INFO_ROUND_MODE__MASK                     0x00000030
+#define A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT                    4
+static inline uint32_t A2XX_RB_COLOR_INFO_ROUND_MODE(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT) & A2XX_RB_COLOR_INFO_ROUND_MODE__MASK;
+}
+#define A2XX_RB_COLOR_INFO_LINEAR                               0x00000040
+#define A2XX_RB_COLOR_INFO_ENDIAN__MASK                         0x00000180
+#define A2XX_RB_COLOR_INFO_ENDIAN__SHIFT                        7
+static inline uint32_t A2XX_RB_COLOR_INFO_ENDIAN(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLOR_INFO_ENDIAN__SHIFT) & A2XX_RB_COLOR_INFO_ENDIAN__MASK;
+}
+#define A2XX_RB_COLOR_INFO_SWAP__MASK                           0x00000600
+#define A2XX_RB_COLOR_INFO_SWAP__SHIFT                          9
+static inline uint32_t A2XX_RB_COLOR_INFO_SWAP(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLOR_INFO_SWAP__SHIFT) & A2XX_RB_COLOR_INFO_SWAP__MASK;
+}
+#define A2XX_RB_COLOR_INFO_BASE__MASK                           0xfffff000
+#define A2XX_RB_COLOR_INFO_BASE__SHIFT                          12
+static inline uint32_t A2XX_RB_COLOR_INFO_BASE(uint32_t val)
+{
+        return ((val >> 10) << A2XX_RB_COLOR_INFO_BASE__SHIFT) & A2XX_RB_COLOR_INFO_BASE__MASK;
+}
+#define REG_A2XX_RB_DEPTH_INFO                                  0x00002002
+#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK                   0x00000001
+#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT                  0
+static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+        return ((val) << A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK                     0xfffff000
+#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT                    12
+static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+        return ((val >> 10) << A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+#define REG_A2XX_A225_RB_COLOR_INFO3                            0x00002005
+#define REG_A2XX_COHER_DEST_BASE_0                              0x00002006
+#define REG_A2XX_PA_SC_SCREEN_SCISSOR_TL                        0x0000200e
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE      0x80000000
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK                    0x00007fff
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT                   0
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK                    0x7fff0000
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT                   16
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+#define REG_A2XX_PA_SC_SCREEN_SCISSOR_BR                        0x0000200f
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE      0x80000000
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK                    0x00007fff
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT                   0
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK                    0x7fff0000
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT                   16
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+#define REG_A2XX_PA_SC_WINDOW_OFFSET                            0x00002080
+#define A2XX_PA_SC_WINDOW_OFFSET_X__MASK                        0x00007fff
+#define A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT                       0
+static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_X(int32_t val)
+{
+        return ((val) << A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_OFFSET_Y__MASK                        0x7fff0000
+#define A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT                       16
+static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_Y(int32_t val)
+{
+        return ((val) << A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_Y__MASK;
+}
+#define A2XX_PA_SC_WINDOW_OFFSET_DISABLE                        0x80000000
+#define REG_A2XX_PA_SC_WINDOW_SCISSOR_TL                        0x00002081
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE      0x80000000
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK                    0x00007fff
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT                   0
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK                    0x7fff0000
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT                   16
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+#define REG_A2XX_PA_SC_WINDOW_SCISSOR_BR                        0x00002082
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE      0x80000000
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK                    0x00007fff
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT                   0
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK                    0x7fff0000
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT                   16
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+#define REG_A2XX_UNKNOWN_2010                                   0x00002010
+#define REG_A2XX_VGT_MAX_VTX_INDX                               0x00002100
+#define REG_A2XX_VGT_MIN_VTX_INDX                               0x00002101
+#define REG_A2XX_VGT_INDX_OFFSET                                0x00002102
+#define REG_A2XX_A225_PC_MULTI_PRIM_IB_RESET_INDX               0x00002103
+#define REG_A2XX_RB_COLOR_MASK                                  0x00002104
+#define A2XX_RB_COLOR_MASK_WRITE_RED                            0x00000001
+#define A2XX_RB_COLOR_MASK_WRITE_GREEN                          0x00000002
+#define A2XX_RB_COLOR_MASK_WRITE_BLUE                           0x00000004
+#define A2XX_RB_COLOR_MASK_WRITE_ALPHA                          0x00000008
+#define REG_A2XX_RB_BLEND_RED                                   0x00002105
+#define REG_A2XX_RB_BLEND_GREEN                                 0x00002106
+#define REG_A2XX_RB_BLEND_BLUE                                  0x00002107
+#define REG_A2XX_RB_BLEND_ALPHA                                 0x00002108
+#define REG_A2XX_RB_FOG_COLOR                                   0x00002109
+#define REG_A2XX_RB_STENCILREFMASK_BF                           0x0000210c
+#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK              0x000000ff
+#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT             0
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+        return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK             0x0000ff00
+#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT            8
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+        return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK        0x00ff0000
+#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT       16
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+        return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+#define REG_A2XX_RB_STENCILREFMASK                              0x0000210d
+#define A2XX_RB_STENCILREFMASK_STENCILREF__MASK                 0x000000ff
+#define A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT                0
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+        return ((val) << A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_STENCILMASK__MASK                0x0000ff00
+#define A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT               8
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+        return ((val) << A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK           0x00ff0000
+#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT          16
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+        return ((val) << A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+#define REG_A2XX_RB_ALPHA_REF                                   0x0000210e
+#define REG_A2XX_PA_CL_VPORT_XSCALE                             0x0000210f
+#define A2XX_PA_CL_VPORT_XSCALE__MASK                           0xffffffff
+#define A2XX_PA_CL_VPORT_XSCALE__SHIFT                          0
+static inline uint32_t A2XX_PA_CL_VPORT_XSCALE(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_VPORT_XSCALE__SHIFT) & A2XX_PA_CL_VPORT_XSCALE__MASK;
+}
+#define REG_A2XX_PA_CL_VPORT_XOFFSET                            0x00002110
+#define A2XX_PA_CL_VPORT_XOFFSET__MASK                          0xffffffff
+#define A2XX_PA_CL_VPORT_XOFFSET__SHIFT                         0
+static inline uint32_t A2XX_PA_CL_VPORT_XOFFSET(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_VPORT_XOFFSET__SHIFT) & A2XX_PA_CL_VPORT_XOFFSET__MASK;
+}
+#define REG_A2XX_PA_CL_VPORT_YSCALE                             0x00002111
+#define A2XX_PA_CL_VPORT_YSCALE__MASK                           0xffffffff
+#define A2XX_PA_CL_VPORT_YSCALE__SHIFT                          0
+static inline uint32_t A2XX_PA_CL_VPORT_YSCALE(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_VPORT_YSCALE__SHIFT) & A2XX_PA_CL_VPORT_YSCALE__MASK;
+}
+#define REG_A2XX_PA_CL_VPORT_YOFFSET                            0x00002112
+#define A2XX_PA_CL_VPORT_YOFFSET__MASK                          0xffffffff
+#define A2XX_PA_CL_VPORT_YOFFSET__SHIFT                         0
+static inline uint32_t A2XX_PA_CL_VPORT_YOFFSET(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_VPORT_YOFFSET__SHIFT) & A2XX_PA_CL_VPORT_YOFFSET__MASK;
+}
+#define REG_A2XX_PA_CL_VPORT_ZSCALE                             0x00002113
+#define A2XX_PA_CL_VPORT_ZSCALE__MASK                           0xffffffff
+#define A2XX_PA_CL_VPORT_ZSCALE__SHIFT                          0
+static inline uint32_t A2XX_PA_CL_VPORT_ZSCALE(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_VPORT_ZSCALE__SHIFT) & A2XX_PA_CL_VPORT_ZSCALE__MASK;
+}
+#define REG_A2XX_PA_CL_VPORT_ZOFFSET                            0x00002114
+#define A2XX_PA_CL_VPORT_ZOFFSET__MASK                          0xffffffff
+#define A2XX_PA_CL_VPORT_ZOFFSET__SHIFT                         0
+static inline uint32_t A2XX_PA_CL_VPORT_ZOFFSET(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_VPORT_ZOFFSET__SHIFT) & A2XX_PA_CL_VPORT_ZOFFSET__MASK;
+}
+#define REG_A2XX_SQ_PROGRAM_CNTL                                0x00002180
+#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK                      0x000000ff
+#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT                     0
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_REGS(uint32_t val)
+{
+        return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK                      0x0000ff00
+#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT                     8
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_REGS(uint32_t val)
+{
+        return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE                        0x00010000
+#define A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE                        0x00020000
+#define A2XX_SQ_PROGRAM_CNTL_PARAM_GEN                          0x00040000
+#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_PIX                      0x00080000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK              0x00f00000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT             20
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(uint32_t val)
+{
+        return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK               0x07000000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT              24
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(enum a2xx_sq_ps_vtx_mode val)
+{
+        return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK               0x78000000
+#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT              27
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(uint32_t val)
+{
+        return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX                      0x80000000
+#define REG_A2XX_SQ_CONTEXT_MISC                                0x00002181
+#define A2XX_SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE                 0x00000001
+#define A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY                0x00000002
+#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK               0x0000000c
+#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT              2
+static inline uint32_t A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum a2xx_sq_sample_cntl val)
+{
+        return ((val) << A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT) & A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK;
+}
+#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK                0x0000ff00
+#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT               8
+static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val)
+{
+        return ((val) << A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT) & A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK;
+}
+#define A2XX_SQ_CONTEXT_MISC_PERFCOUNTER_REF                    0x00010000
+#define A2XX_SQ_CONTEXT_MISC_YEILD_OPTIMIZE                     0x00020000
+#define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL                       0x00040000
+#define REG_A2XX_SQ_INTERPOLATOR_CNTL                           0x00002182
+#define REG_A2XX_SQ_WRAPPING_0                                  0x00002183
+#define REG_A2XX_SQ_WRAPPING_1                                  0x00002184
+#define REG_A2XX_SQ_PS_PROGRAM                                  0x000021f6
+#define REG_A2XX_SQ_VS_PROGRAM                                  0x000021f7
+#define REG_A2XX_VGT_EVENT_INITIATOR                            0x000021f9
+#define REG_A2XX_VGT_DRAW_INITIATOR                             0x000021fc
+#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK                 0x0000003f
+#define A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT                0
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val)
+{
+        return ((val) << A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK             0x000000c0
+#define A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT            6
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+        return ((val) << A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A2XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK                  0x00000600
+#define A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT                 9
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+        return ((val) << A2XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A2XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK                0x00000800
+#define A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT               11
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val)
+{
+        return ((val) << A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A2XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK;
+}
+#define A2XX_VGT_DRAW_INITIATOR_NOT_EOP                         0x00001000
+#define A2XX_VGT_DRAW_INITIATOR_SMALL_INDEX                     0x00002000
+#define A2XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE       0x00004000
+#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK             0xff000000
+#define A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT            24
+static inline uint32_t A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val)
+{
+        return ((val) << A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A2XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK;
+}
+#define REG_A2XX_VGT_IMMED_DATA                                 0x000021fd
+#define REG_A2XX_RB_DEPTHCONTROL                                0x00002200
+#define A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE                     0x00000001
+#define A2XX_RB_DEPTHCONTROL_Z_ENABLE                           0x00000002
+#define A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE                     0x00000004
+#define A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE                     0x00000008
+#define A2XX_RB_DEPTHCONTROL_ZFUNC__MASK                        0x00000070
+#define A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT                       4
+static inline uint32_t A2XX_RB_DEPTHCONTROL_ZFUNC(enum adreno_compare_func val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_ZFUNC__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE                    0x00000080
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK                  0x00000700
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT                 8
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC(enum adreno_compare_func val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK                  0x00003800
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT                 11
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL(enum adreno_stencil_op val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK                 0x0001c000
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT                14
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS(enum adreno_stencil_op val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK                 0x000e0000
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT                17
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL(enum adreno_stencil_op val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK               0x00700000
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT              20
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(enum adreno_compare_func val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK               0x03800000
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT              23
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK              0x1c000000
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT             26
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK              0xe0000000
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT             29
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK;
+}
+#define REG_A2XX_RB_BLEND_CONTROL                               0x00002201
+#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK              0x0000001f
+#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT             0
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK              0x000000e0
+#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT             5
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(enum a2xx_rb_blend_opcode val)
+{
+        return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK             0x00001f00
+#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT            8
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK              0x001f0000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT             16
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK              0x00e00000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT             21
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(enum a2xx_rb_blend_opcode val)
+{
+        return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK             0x1f000000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT            24
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE_ENABLE                0x20000000
+#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE                       0x40000000
+#define REG_A2XX_RB_COLORCONTROL                                0x00002202
+#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK                   0x00000007
+#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT                  0
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_FUNC(enum adreno_compare_func val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE                  0x00000008
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE               0x00000010
+#define A2XX_RB_COLORCONTROL_BLEND_DISABLE                      0x00000020
+#define A2XX_RB_COLORCONTROL_VOB_ENABLE                         0x00000040
+#define A2XX_RB_COLORCONTROL_VS_EXPORTS_FOG                     0x00000080
+#define A2XX_RB_COLORCONTROL_ROP_CODE__MASK                     0x00000f00
+#define A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT                    8
+static inline uint32_t A2XX_RB_COLORCONTROL_ROP_CODE(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT) & A2XX_RB_COLORCONTROL_ROP_CODE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_DITHER_MODE__MASK                  0x00003000
+#define A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT                 12
+static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_MODE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK                  0x0000c000
+#define A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT                 14
+static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_TYPE(enum a2xx_rb_dither_type val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_PIXEL_FOG                          0x00010000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK        0x03000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT       24
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK        0x0c000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT       26
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK        0x30000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT       28
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK        0xc0000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT       30
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(uint32_t val)
+{
+        return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK;
+}
+#define REG_A2XX_VGT_CURRENT_BIN_ID_MAX                         0x00002203
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK                0x00000007
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT               0
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN(uint32_t val)
+{
+        return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK                   0x00000038
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT                  3
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_ROW(uint32_t val)
+{
+        return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK       0x000001c0
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT      6
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK(uint32_t val)
+{
+        return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK;
+}
+#define REG_A2XX_PA_CL_CLIP_CNTL                                0x00002204
+#define A2XX_PA_CL_CLIP_CNTL_CLIP_DISABLE                       0x00010000
+#define A2XX_PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA             0x00040000
+#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK            0x00080000
+#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT           19
+static inline uint32_t A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum a2xx_dx_clip_space val)
+{
+        return ((val) << A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT) & A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK;
+}
+#define A2XX_PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT                0x00100000
+#define A2XX_PA_CL_CLIP_CNTL_VTX_KILL_OR                        0x00200000
+#define A2XX_PA_CL_CLIP_CNTL_XY_NAN_RETAIN                      0x00400000
+#define A2XX_PA_CL_CLIP_CNTL_Z_NAN_RETAIN                       0x00800000
+#define A2XX_PA_CL_CLIP_CNTL_W_NAN_RETAIN                       0x01000000
+#define REG_A2XX_PA_SU_SC_MODE_CNTL                             0x00002205
+#define A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT                      0x00000001
+#define A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK                       0x00000002
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE                            0x00000004
+#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK                  0x00000018
+#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT                 3
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(enum a2xx_pa_su_sc_polymode val)
+{
+        return ((val) << A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK               0x000000e0
+#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT              5
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+        return ((val) << A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK                0x00000700
+#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT               8
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+        return ((val) << A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE        0x00000800
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE         0x00001000
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE         0x00002000
+#define A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE                     0x00008000
+#define A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE        0x00010000
+#define A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE             0x00040000
+#define A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST              0x00080000
+#define A2XX_PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS                  0x00100000
+#define A2XX_PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA               0x00200000
+#define A2XX_PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE               0x00800000
+#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI            0x02000000
+#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE        0x04000000
+#define A2XX_PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS                0x10000000
+#define A2XX_PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS              0x20000000
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE                0x40000000
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE               0x80000000
+#define REG_A2XX_PA_CL_VTE_CNTL                                 0x00002206
+#define A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA                   0x00000001
+#define A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA                  0x00000002
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA                   0x00000004
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA                  0x00000008
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA                   0x00000010
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA                  0x00000020
+#define A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT                          0x00000100
+#define A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT                           0x00000200
+#define A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT                          0x00000400
+#define A2XX_PA_CL_VTE_CNTL_PERFCOUNTER_REF                     0x00000800
+#define REG_A2XX_VGT_CURRENT_BIN_ID_MIN                         0x00002207
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK                0x00000007
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT               0
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN(uint32_t val)
+{
+        return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK                   0x00000038
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT                  3
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_ROW(uint32_t val)
+{
+        return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK       0x000001c0
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT      6
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK(uint32_t val)
+{
+        return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK;
+}
+#define REG_A2XX_RB_MODECONTROL                                 0x00002208
+#define A2XX_RB_MODECONTROL_EDRAM_MODE__MASK                    0x00000007
+#define A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT                   0
+static inline uint32_t A2XX_RB_MODECONTROL_EDRAM_MODE(enum a2xx_rb_edram_mode val)
+{
+        return ((val) << A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT) & A2XX_RB_MODECONTROL_EDRAM_MODE__MASK;
+}
+#define REG_A2XX_A220_RB_LRZ_VSC_CONTROL                        0x00002209
+#define REG_A2XX_RB_SAMPLE_POS                                  0x0000220a
+#define REG_A2XX_CLEAR_COLOR                                    0x0000220b
+#define A2XX_CLEAR_COLOR_RED__MASK                              0x000000ff
+#define A2XX_CLEAR_COLOR_RED__SHIFT                             0
+static inline uint32_t A2XX_CLEAR_COLOR_RED(uint32_t val)
+{
+        return ((val) << A2XX_CLEAR_COLOR_RED__SHIFT) & A2XX_CLEAR_COLOR_RED__MASK;
+}
+#define A2XX_CLEAR_COLOR_GREEN__MASK                            0x0000ff00
+#define A2XX_CLEAR_COLOR_GREEN__SHIFT                           8
+static inline uint32_t A2XX_CLEAR_COLOR_GREEN(uint32_t val)
+{
+        return ((val) << A2XX_CLEAR_COLOR_GREEN__SHIFT) & A2XX_CLEAR_COLOR_GREEN__MASK;
+}
+#define A2XX_CLEAR_COLOR_BLUE__MASK                             0x00ff0000
+#define A2XX_CLEAR_COLOR_BLUE__SHIFT                            16
+static inline uint32_t A2XX_CLEAR_COLOR_BLUE(uint32_t val)
+{
+        return ((val) << A2XX_CLEAR_COLOR_BLUE__SHIFT) & A2XX_CLEAR_COLOR_BLUE__MASK;
+}
+#define A2XX_CLEAR_COLOR_ALPHA__MASK                            0xff000000
+#define A2XX_CLEAR_COLOR_ALPHA__SHIFT                           24
+static inline uint32_t A2XX_CLEAR_COLOR_ALPHA(uint32_t val)
+{
+        return ((val) << A2XX_CLEAR_COLOR_ALPHA__SHIFT) & A2XX_CLEAR_COLOR_ALPHA__MASK;
+}
+#define REG_A2XX_A220_GRAS_CONTROL                              0x00002210
+#define REG_A2XX_PA_SU_POINT_SIZE                               0x00002280
+#define A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK                      0x0000ffff
+#define A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT                     0
+static inline uint32_t A2XX_PA_SU_POINT_SIZE_HEIGHT(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK;
+}
+#define A2XX_PA_SU_POINT_SIZE_WIDTH__MASK                       0xffff0000
+#define A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT                      16
+static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK;
+}
+#define REG_A2XX_PA_SU_POINT_MINMAX                             0x00002281
+#define A2XX_PA_SU_POINT_MINMAX_MIN__MASK                       0x0000ffff
+#define A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT                      0
+static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MIN(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A2XX_PA_SU_POINT_MINMAX_MAX__MASK                       0xffff0000
+#define A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT                      16
+static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK;
+}
+#define REG_A2XX_PA_SU_LINE_CNTL                                0x00002282
+#define A2XX_PA_SU_LINE_CNTL_WIDTH__MASK                        0x0000ffff
+#define A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT                       0
+static inline uint32_t A2XX_PA_SU_LINE_CNTL_WIDTH(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK;
+}
+#define REG_A2XX_PA_SC_LINE_STIPPLE                             0x00002283
+#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK              0x0000ffff
+#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT             0
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK              0x00ff0000
+#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT             16
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK         0x10000000
+#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT        28
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum a2xx_pa_sc_pattern_bit_order val)
+{
+        return ((val) << A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK           0x60000000
+#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT          29
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_auto_reset_cntl val)
+{
+        return ((val) << A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK;
+}
+#define REG_A2XX_PA_SC_VIZ_QUERY                                0x00002293
+#define REG_A2XX_VGT_ENHANCE                                    0x00002294
+#define REG_A2XX_PA_SC_LINE_CNTL                                0x00002300
+#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK                    0x0000ffff
+#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT                   0
+static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val)
+{
+        return ((val) << A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT) & A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK;
+}
+#define A2XX_PA_SC_LINE_CNTL_USE_BRES_CNTL                      0x00000100
+#define A2XX_PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH                  0x00000200
+#define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL                         0x00000400
+#define REG_A2XX_PA_SC_AA_CONFIG                                0x00002301
+#define REG_A2XX_PA_SU_VTX_CNTL                                 0x00002302
+#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK                    0x00000001
+#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT                   0
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_PIX_CENTER(enum a2xx_pa_pixcenter val)
+{
+        return ((val) << A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT) & A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK;
+}
+#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK                    0x00000006
+#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT                   1
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_ROUND_MODE(enum a2xx_pa_roundmode val)
+{
+        return ((val) << A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK;
+}
+#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK                    0x00000380
+#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT                   7
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_QUANT_MODE(enum a2xx_pa_quantmode val)
+{
+        return ((val) << A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK;
+}
+#define REG_A2XX_PA_CL_GB_VERT_CLIP_ADJ                         0x00002303
+#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK                       0xffffffff
+#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT                      0
+static inline uint32_t A2XX_PA_CL_GB_VERT_CLIP_ADJ(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK;
+}
+#define REG_A2XX_PA_CL_GB_VERT_DISC_ADJ                         0x00002304
+#define A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK                       0xffffffff
+#define A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT                      0
+static inline uint32_t A2XX_PA_CL_GB_VERT_DISC_ADJ(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK;
+}
+#define REG_A2XX_PA_CL_GB_HORZ_CLIP_ADJ                         0x00002305
+#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK                       0xffffffff
+#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT                      0
+static inline uint32_t A2XX_PA_CL_GB_HORZ_CLIP_ADJ(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK;
+}
+#define REG_A2XX_PA_CL_GB_HORZ_DISC_ADJ                         0x00002306
+#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK                       0xffffffff
+#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT                      0
+static inline uint32_t A2XX_PA_CL_GB_HORZ_DISC_ADJ(float val)
+{
+        return ((fui(val)) << A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK;
+}
+#define REG_A2XX_SQ_VS_CONST                                    0x00002307
+#define A2XX_SQ_VS_CONST_BASE__MASK                             0x000001ff
+#define A2XX_SQ_VS_CONST_BASE__SHIFT                            0
+static inline uint32_t A2XX_SQ_VS_CONST_BASE(uint32_t val)
+{
+        return ((val) << A2XX_SQ_VS_CONST_BASE__SHIFT) & A2XX_SQ_VS_CONST_BASE__MASK;
+}
+#define A2XX_SQ_VS_CONST_SIZE__MASK                             0x001ff000
+#define A2XX_SQ_VS_CONST_SIZE__SHIFT                            12
+static inline uint32_t A2XX_SQ_VS_CONST_SIZE(uint32_t val)
+{
+        return ((val) << A2XX_SQ_VS_CONST_SIZE__SHIFT) & A2XX_SQ_VS_CONST_SIZE__MASK;
+}
+#define REG_A2XX_SQ_PS_CONST                                    0x00002308
+#define A2XX_SQ_PS_CONST_BASE__MASK                             0x000001ff
+#define A2XX_SQ_PS_CONST_BASE__SHIFT                            0
+static inline uint32_t A2XX_SQ_PS_CONST_BASE(uint32_t val)
+{
+        return ((val) << A2XX_SQ_PS_CONST_BASE__SHIFT) & A2XX_SQ_PS_CONST_BASE__MASK;
+}
+#define A2XX_SQ_PS_CONST_SIZE__MASK                             0x001ff000
+#define A2XX_SQ_PS_CONST_SIZE__SHIFT                            12
+static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val)
+{
+        return ((val) << A2XX_SQ_PS_CONST_SIZE__SHIFT) & A2XX_SQ_PS_CONST_SIZE__MASK;
+}
+#define REG_A2XX_SQ_DEBUG_MISC_0                                0x00002309
+#define REG_A2XX_SQ_DEBUG_MISC_1                                0x0000230a
+#define REG_A2XX_PA_SC_AA_MASK                                  0x00002312
+#define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL                    0x00002316
+#define REG_A2XX_VGT_OUT_DEALLOC_CNTL                           0x00002317
+#define REG_A2XX_RB_COPY_CONTROL                                0x00002318
+#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK           0x00000007
+#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT          0
+static inline uint32_t A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT(enum a2xx_rb_copy_sample_select val)
+{
+        return ((val) << A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT) & A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK;
+}
+#define A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE                 0x00000008
+#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK                   0x000000f0
+#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT                  4
+static inline uint32_t A2XX_RB_COPY_CONTROL_CLEAR_MASK(uint32_t val)
+{
+        return ((val) << A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT) & A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK;
+}
+#define REG_A2XX_RB_COPY_DEST_BASE                              0x00002319
+#define REG_A2XX_RB_COPY_DEST_PITCH                             0x0000231a
+#define A2XX_RB_COPY_DEST_PITCH__MASK                           0xffffffff
+#define A2XX_RB_COPY_DEST_PITCH__SHIFT                          0
+static inline uint32_t A2XX_RB_COPY_DEST_PITCH(uint32_t val)
+{
+        return ((val >> 5) << A2XX_RB_COPY_DEST_PITCH__SHIFT) & A2XX_RB_COPY_DEST_PITCH__MASK;
+}
+#define REG_A2XX_RB_COPY_DEST_INFO                              0x0000231b
+#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK                0x00000007
+#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT               0
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN(enum adreno_rb_surface_endian val)
+{
+        return ((val) << A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT) & A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_LINEAR                           0x00000008
+#define A2XX_RB_COPY_DEST_INFO_FORMAT__MASK                     0x000000f0
+#define A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT                    4
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_FORMAT(enum a2xx_colorformatx val)
+{
+        return ((val) << A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A2XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_SWAP__MASK                       0x00000300
+#define A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT                      8
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_SWAP(uint32_t val)
+{
+        return ((val) << A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A2XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK                0x00000c00
+#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT               10
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+        return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK                0x00003000
+#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT               12
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_TYPE(enum a2xx_rb_dither_type val)
+{
+        return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_WRITE_RED                        0x00004000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_GREEN                      0x00008000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_BLUE                       0x00010000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA                      0x00020000
+#define REG_A2XX_RB_COPY_DEST_OFFSET                            0x0000231c
+#define A2XX_RB_COPY_DEST_OFFSET_X__MASK                        0x00001fff
+#define A2XX_RB_COPY_DEST_OFFSET_X__SHIFT                       0
+static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_X(uint32_t val)
+{
+        return ((val) << A2XX_RB_COPY_DEST_OFFSET_X__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_X__MASK;
+}
+#define A2XX_RB_COPY_DEST_OFFSET_Y__MASK                        0x03ffe000
+#define A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT                       13
+static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_Y(uint32_t val)
+{
+        return ((val) << A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_Y__MASK;
+}
+#define REG_A2XX_RB_DEPTH_CLEAR                                 0x0000231d
+#define REG_A2XX_RB_SAMPLE_COUNT_CTL                            0x00002324
+#define REG_A2XX_RB_COLOR_DEST_MASK                             0x00002326
+#define REG_A2XX_A225_GRAS_UCP0X                                0x00002340
+#define REG_A2XX_A225_GRAS_UCP5W                                0x00002357
+#define REG_A2XX_A225_GRAS_UCP_ENABLED                          0x00002360
+#define REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE                  0x00002380
+#define REG_A2XX_PA_SU_POLY_OFFSET_BACK_OFFSET                  0x00002383
+#define REG_A2XX_SQ_CONSTANT_0                                  0x00004000
+#define REG_A2XX_SQ_FETCH_0                                     0x00004800
+#define REG_A2XX_SQ_CF_BOOLEANS                                 0x00004900
+#define REG_A2XX_SQ_CF_LOOP                                     0x00004908
+#define REG_A2XX_COHER_SIZE_PM4                                 0x00000a29
+#define REG_A2XX_COHER_BASE_PM4                                 0x00000a2a
+#define REG_A2XX_COHER_STATUS_PM4                               0x00000a2b
+#define REG_A2XX_SQ_TEX_0                                       0x00000000
+#define A2XX_SQ_TEX_0_CLAMP_X__MASK                             0x00001c00
+#define A2XX_SQ_TEX_0_CLAMP_X__SHIFT                            10
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_X(enum sq_tex_clamp val)
+{
+        return ((val) << A2XX_SQ_TEX_0_CLAMP_X__SHIFT) & A2XX_SQ_TEX_0_CLAMP_X__MASK;
+}
+#define A2XX_SQ_TEX_0_CLAMP_Y__MASK                             0x0000e000
+#define A2XX_SQ_TEX_0_CLAMP_Y__SHIFT                            13
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Y(enum sq_tex_clamp val)
+{
+        return ((val) << A2XX_SQ_TEX_0_CLAMP_Y__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Y__MASK;
+}
+#define A2XX_SQ_TEX_0_CLAMP_Z__MASK                             0x00070000
+#define A2XX_SQ_TEX_0_CLAMP_Z__SHIFT                            16
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Z(enum sq_tex_clamp val)
+{
+        return ((val) << A2XX_SQ_TEX_0_CLAMP_Z__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Z__MASK;
+}
+#define A2XX_SQ_TEX_0_PITCH__MASK                               0xffc00000
+#define A2XX_SQ_TEX_0_PITCH__SHIFT                              22
+static inline uint32_t A2XX_SQ_TEX_0_PITCH(uint32_t val)
+{
+        return ((val >> 5) << A2XX_SQ_TEX_0_PITCH__SHIFT) & A2XX_SQ_TEX_0_PITCH__MASK;
+}
+#define REG_A2XX_SQ_TEX_1                                       0x00000001
+#define REG_A2XX_SQ_TEX_2                                       0x00000002
+#define A2XX_SQ_TEX_2_WIDTH__MASK                               0x00001fff
+#define A2XX_SQ_TEX_2_WIDTH__SHIFT                              0
+static inline uint32_t A2XX_SQ_TEX_2_WIDTH(uint32_t val)
+{
+        return ((val) << A2XX_SQ_TEX_2_WIDTH__SHIFT) & A2XX_SQ_TEX_2_WIDTH__MASK;
+}
+#define A2XX_SQ_TEX_2_HEIGHT__MASK                              0x03ffe000
+#define A2XX_SQ_TEX_2_HEIGHT__SHIFT                             13
+static inline uint32_t A2XX_SQ_TEX_2_HEIGHT(uint32_t val)
+{
+        return ((val) << A2XX_SQ_TEX_2_HEIGHT__SHIFT) & A2XX_SQ_TEX_2_HEIGHT__MASK;
+}
+#define REG_A2XX_SQ_TEX_3                                       0x00000003
+#define A2XX_SQ_TEX_3_SWIZ_X__MASK                              0x0000000e
+#define A2XX_SQ_TEX_3_SWIZ_X__SHIFT                             1
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_X(enum sq_tex_swiz val)
+{
+        return ((val) << A2XX_SQ_TEX_3_SWIZ_X__SHIFT) & A2XX_SQ_TEX_3_SWIZ_X__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_Y__MASK                              0x00000070
+#define A2XX_SQ_TEX_3_SWIZ_Y__SHIFT                             4
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Y(enum sq_tex_swiz val)
+{
+        return ((val) << A2XX_SQ_TEX_3_SWIZ_Y__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Y__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_Z__MASK                              0x00000380
+#define A2XX_SQ_TEX_3_SWIZ_Z__SHIFT                             7
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Z(enum sq_tex_swiz val)
+{
+        return ((val) << A2XX_SQ_TEX_3_SWIZ_Z__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Z__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_W__MASK                              0x00001c00
+#define A2XX_SQ_TEX_3_SWIZ_W__SHIFT                             10
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_W(enum sq_tex_swiz val)
+{
+        return ((val) << A2XX_SQ_TEX_3_SWIZ_W__SHIFT) & A2XX_SQ_TEX_3_SWIZ_W__MASK;
+}
+#define A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK                       0x00180000
+#define A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT                      19
+static inline uint32_t A2XX_SQ_TEX_3_XY_MAG_FILTER(enum sq_tex_filter val)
+{
+        return ((val) << A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK                       0x00600000
+#define A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT                      21
+static inline uint32_t A2XX_SQ_TEX_3_XY_MIN_FILTER(enum sq_tex_filter val)
+{
+        return ((val) << A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK;
+}
+#endif /* A2XX_XML */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/disasm-a2xx.c
 ,0 → 1,632
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include "disasm.h"
+#include "instr-a2xx.h"
+static const char *levels[] = {
+                "\t",
+                "\t\t",
+                "\t\t\t",
+                "\t\t\t\t",
+                "\t\t\t\t\t",
+                "\t\t\t\t\t\t",
+                "\t\t\t\t\t\t\t",
+                "\t\t\t\t\t\t\t\t",
+                "\t\t\t\t\t\t\t\t\t",
+                "x",
+                "x",
+                "x",
+                "x",
+                "x",
+                "x",
+};
+static enum debug_t debug;
+/*
+ * ALU instructions:
+ */
+static const char chan_names[] = {
+                'x', 'y', 'z', 'w',
+                /* these only apply to FETCH dst's: */
+                '0', '1', '?', '_',
+};
+static void print_srcreg(uint32_t num, uint32_t type,
+                uint32_t swiz, uint32_t negate, uint32_t abs)
+{
+        if (negate)
+                printf("-");
+        if (abs)
+                printf("|");
+        printf("%c%u", type ? 'R' : 'C', num);
+        if (swiz) {
+                int i;
+                printf(".");
+                for (i = 0; i < 4; i++) {
+                        printf("%c", chan_names[(swiz + i) & 0x3]);
+                        swiz >>= 2;
+                }
+        }
+        if (abs)
+                printf("|");
+}
+static void print_dstreg(uint32_t num, uint32_t mask, uint32_t dst_exp)
+{
+        printf("%s%u", dst_exp ? "export" : "R", num);
+        if (mask != 0xf) {
+                int i;
+                printf(".");
+                for (i = 0; i < 4; i++) {
+                        printf("%c", (mask & 0x1) ? chan_names[i] : '_');
+                        mask >>= 1;
+                }
+        }
+}
+static void print_export_comment(uint32_t num, enum shader_t type)
+{
+        const char *name = NULL;
+        switch (type) {
+        case SHADER_VERTEX:
+                switch (num) {
+                case 62: name = "gl_Position";  break;
+                case 63: name = "gl_PointSize"; break;
+                }
+                break;
+        case SHADER_FRAGMENT:
+                switch (num) {
+                case 0:  name = "gl_FragColor"; break;
+                }
+                break;
+        }
+        /* if we had a symbol table here, we could look
+         * up the name of the varying..
+         */
+        if (name) {
+                printf("\t; %s", name);
+        }
+}
+struct {
+        uint32_t num_srcs;
+        const char *name;
+} vector_instructions[0x20] = {
+#define INSTR(opc, num_srcs) [opc] = { num_srcs, #opc }
+                INSTR(ADDv, 2),
+                INSTR(MULv, 2),
+                INSTR(MAXv, 2),
+                INSTR(MINv, 2),
+                INSTR(SETEv, 2),
+                INSTR(SETGTv, 2),
+                INSTR(SETGTEv, 2),
+                INSTR(SETNEv, 2),
+                INSTR(FRACv, 1),
+                INSTR(TRUNCv, 1),
+                INSTR(FLOORv, 1),
+                INSTR(MULADDv, 3),
+                INSTR(CNDEv, 3),
+                INSTR(CNDGTEv, 3),
+                INSTR(CNDGTv, 3),
+                INSTR(DOT4v, 2),
+                INSTR(DOT3v, 2),
+                INSTR(DOT2ADDv, 3),  // ???
+                INSTR(CUBEv, 2),
+                INSTR(MAX4v, 1),
+                INSTR(PRED_SETE_PUSHv, 2),
+                INSTR(PRED_SETNE_PUSHv, 2),
+                INSTR(PRED_SETGT_PUSHv, 2),
+                INSTR(PRED_SETGTE_PUSHv, 2),
+                INSTR(KILLEv, 2),
+                INSTR(KILLGTv, 2),
+                INSTR(KILLGTEv, 2),
+                INSTR(KILLNEv, 2),
+                INSTR(DSTv, 2),
+                INSTR(MOVAv, 1),
+}, scalar_instructions[0x40] = {
+                INSTR(ADDs, 1),
+                INSTR(ADD_PREVs, 1),
+                INSTR(MULs, 1),
+                INSTR(MUL_PREVs, 1),
+                INSTR(MUL_PREV2s, 1),
+                INSTR(MAXs, 1),
+                INSTR(MINs, 1),
+                INSTR(SETEs, 1),
+                INSTR(SETGTs, 1),
+                INSTR(SETGTEs, 1),
+                INSTR(SETNEs, 1),
+                INSTR(FRACs, 1),
+                INSTR(TRUNCs, 1),
+                INSTR(FLOORs, 1),
+                INSTR(EXP_IEEE, 1),
+                INSTR(LOG_CLAMP, 1),
+                INSTR(LOG_IEEE, 1),
+                INSTR(RECIP_CLAMP, 1),
+                INSTR(RECIP_FF, 1),
+                INSTR(RECIP_IEEE, 1),
+                INSTR(RECIPSQ_CLAMP, 1),
+                INSTR(RECIPSQ_FF, 1),
+                INSTR(RECIPSQ_IEEE, 1),
+                INSTR(MOVAs, 1),
+                INSTR(MOVA_FLOORs, 1),
+                INSTR(SUBs, 1),
+                INSTR(SUB_PREVs, 1),
+                INSTR(PRED_SETEs, 1),
+                INSTR(PRED_SETNEs, 1),
+                INSTR(PRED_SETGTs, 1),
+                INSTR(PRED_SETGTEs, 1),
+                INSTR(PRED_SET_INVs, 1),
+                INSTR(PRED_SET_POPs, 1),
+                INSTR(PRED_SET_CLRs, 1),
+                INSTR(PRED_SET_RESTOREs, 1),
+                INSTR(KILLEs, 1),
+                INSTR(KILLGTs, 1),
+                INSTR(KILLGTEs, 1),
+                INSTR(KILLNEs, 1),
+                INSTR(KILLONEs, 1),
+                INSTR(SQRT_IEEE, 1),
+                INSTR(MUL_CONST_0, 1),
+                INSTR(MUL_CONST_1, 1),
+                INSTR(ADD_CONST_0, 1),
+                INSTR(ADD_CONST_1, 1),
+                INSTR(SUB_CONST_0, 1),
+                INSTR(SUB_CONST_1, 1),
+                INSTR(SIN, 1),
+                INSTR(COS, 1),
+                INSTR(RETAIN_PREV, 1),
+#undef INSTR
+};
+static int disasm_alu(uint32_t *dwords, uint32_t alu_off,
+                int level, int sync, enum shader_t type)
+{
+        instr_alu_t *alu = (instr_alu_t *)dwords;
+        printf("%s", levels[level]);
+        if (debug & PRINT_RAW) {
+                printf("%02x: %08x %08x %08x\t", alu_off,
+                                dwords[0], dwords[1], dwords[2]);
+        }
+        printf("   %sALU:\t", sync ? "(S)" : "   ");
+        printf("%s", vector_instructions[alu->vector_opc].name);
+        if (alu->pred_select & 0x2) {
+                /* seems to work similar to conditional execution in ARM instruction
+                 * set, so let's use a similar syntax for now:
+                 */
+                printf((alu->pred_select & 0x1) ? "EQ" : "NE");
+        }
+        printf("\t");
+        print_dstreg(alu->vector_dest, alu->vector_write_mask, alu->export_data);
+        printf(" = ");
+        if (vector_instructions[alu->vector_opc].num_srcs == 3) {
+                print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                                alu->src3_reg_negate, alu->src3_reg_abs);
+                printf(", ");
+        }
+        print_srcreg(alu->src1_reg, alu->src1_sel, alu->src1_swiz,
+                        alu->src1_reg_negate, alu->src1_reg_abs);
+        if (vector_instructions[alu->vector_opc].num_srcs > 1) {
+                printf(", ");
+                print_srcreg(alu->src2_reg, alu->src2_sel, alu->src2_swiz,
+                                alu->src2_reg_negate, alu->src2_reg_abs);
+        }
+        if (alu->vector_clamp)
+                printf(" CLAMP");
+        if (alu->export_data)
+                print_export_comment(alu->vector_dest, type);
+        printf("\n");
+        if (alu->scalar_write_mask || !alu->vector_write_mask) {
+                /* 2nd optional scalar op: */
+                printf("%s", levels[level]);
+                if (debug & PRINT_RAW)
+                        printf("                          \t");
+                if (scalar_instructions[alu->scalar_opc].name) {
+                        printf("\t    \t%s\t", scalar_instructions[alu->scalar_opc].name);
+                } else {
+                        printf("\t    \tOP(%u)\t", alu->scalar_opc);
+                }
+                print_dstreg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
+                printf(" = ");
+                print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                                alu->src3_reg_negate, alu->src3_reg_abs);
+                // TODO ADD/MUL must have another src?!?
+                if (alu->scalar_clamp)
+                        printf(" CLAMP");
+                if (alu->export_data)
+                        print_export_comment(alu->scalar_dest, type);
+                printf("\n");
+        }
+        return 0;
+}
+/*
+ * FETCH instructions:
+ */
+struct {
+        const char *name;
+} fetch_types[0xff] = {
+#define TYPE(id) [id] = { #id }
+                TYPE(FMT_1_REVERSE),
+                TYPE(FMT_32_FLOAT),
+                TYPE(FMT_32_32_FLOAT),
+                TYPE(FMT_32_32_32_FLOAT),
+                TYPE(FMT_32_32_32_32_FLOAT),
+                TYPE(FMT_16),
+                TYPE(FMT_16_16),
+                TYPE(FMT_16_16_16_16),
+                TYPE(FMT_8),
+                TYPE(FMT_8_8),
+                TYPE(FMT_8_8_8_8),
+                TYPE(FMT_32),
+                TYPE(FMT_32_32),
+                TYPE(FMT_32_32_32_32),
+#undef TYPE
+};
+static void print_fetch_dst(uint32_t dst_reg, uint32_t dst_swiz)
+{
+        int i;
+        printf("\tR%u.", dst_reg);
+        for (i = 0; i < 4; i++) {
+                printf("%c", chan_names[dst_swiz & 0x7]);
+                dst_swiz >>= 3;
+        }
+}
+static void print_fetch_vtx(instr_fetch_t *fetch)
+{
+        instr_fetch_vtx_t *vtx = &fetch->vtx;
+        if (vtx->pred_select) {
+                /* seems to work similar to conditional execution in ARM instruction
+                 * set, so let's use a similar syntax for now:
+                 */
+                printf(vtx->pred_condition ? "EQ" : "NE");
+        }
+        print_fetch_dst(vtx->dst_reg, vtx->dst_swiz);
+        printf(" = R%u.", vtx->src_reg);
+        printf("%c", chan_names[vtx->src_swiz & 0x3]);
+        if (fetch_types[vtx->format].name) {
+                printf(" %s", fetch_types[vtx->format].name);
+        } else  {
+                printf(" TYPE(0x%x)", vtx->format);
+        }
+        printf(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
+        if (!vtx->num_format_all)
+                printf(" NORMALIZED");
+        printf(" STRIDE(%u)", vtx->stride);
+        if (vtx->offset)
+                printf(" OFFSET(%u)", vtx->offset);
+        printf(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel);
+        if (0) {
+                // XXX
+                printf(" src_reg_am=%u", vtx->src_reg_am);
+                printf(" dst_reg_am=%u", vtx->dst_reg_am);
+                printf(" num_format_all=%u", vtx->num_format_all);
+                printf(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
+                printf(" exp_adjust_all=%u", vtx->exp_adjust_all);
+        }
+}
+static void print_fetch_tex(instr_fetch_t *fetch)
+{
+        static const char *filter[] = {
+                        [TEX_FILTER_POINT] = "POINT",
+                        [TEX_FILTER_LINEAR] = "LINEAR",
+                        [TEX_FILTER_BASEMAP] = "BASEMAP",
+        };
+        static const char *aniso_filter[] = {
+                        [ANISO_FILTER_DISABLED] = "DISABLED",
+                        [ANISO_FILTER_MAX_1_1] = "MAX_1_1",
+                        [ANISO_FILTER_MAX_2_1] = "MAX_2_1",
+                        [ANISO_FILTER_MAX_4_1] = "MAX_4_1",
+                        [ANISO_FILTER_MAX_8_1] = "MAX_8_1",
+                        [ANISO_FILTER_MAX_16_1] = "MAX_16_1",
+        };
+        static const char *arbitrary_filter[] = {
+                        [ARBITRARY_FILTER_2X4_SYM] = "2x4_SYM",
+                        [ARBITRARY_FILTER_2X4_ASYM] = "2x4_ASYM",
+                        [ARBITRARY_FILTER_4X2_SYM] = "4x2_SYM",
+                        [ARBITRARY_FILTER_4X2_ASYM] = "4x2_ASYM",
+                        [ARBITRARY_FILTER_4X4_SYM] = "4x4_SYM",
+                        [ARBITRARY_FILTER_4X4_ASYM] = "4x4_ASYM",
+        };
+        static const char *sample_loc[] = {
+                        [SAMPLE_CENTROID] = "CENTROID",
+                        [SAMPLE_CENTER] = "CENTER",
+        };
+        instr_fetch_tex_t *tex = &fetch->tex;
+        uint32_t src_swiz = tex->src_swiz;
+        int i;
+        if (tex->pred_select) {
+                /* seems to work similar to conditional execution in ARM instruction
+                 * set, so let's use a similar syntax for now:
+                 */
+                printf(tex->pred_condition ? "EQ" : "NE");
+        }
+        print_fetch_dst(tex->dst_reg, tex->dst_swiz);
+        printf(" = R%u.", tex->src_reg);
+        for (i = 0; i < 3; i++) {
+                printf("%c", chan_names[src_swiz & 0x3]);
+                src_swiz >>= 2;
+        }
+        printf(" CONST(%u)", tex->const_idx);
+        if (tex->fetch_valid_only)
+                printf(" VALID_ONLY");
+        if (tex->tx_coord_denorm)
+                printf(" DENORM");
+        if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST)
+                printf(" MAG(%s)", filter[tex->mag_filter]);
+        if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST)
+                printf(" MIN(%s)", filter[tex->min_filter]);
+        if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST)
+                printf(" MIP(%s)", filter[tex->mip_filter]);
+        if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST)
+                printf(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
+        if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST)
+                printf(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]);
+        if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST)
+                printf(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
+        if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST)
+                printf(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
+        if (!tex->use_comp_lod) {
+                printf(" LOD(%u)", tex->use_comp_lod);
+                printf(" LOD_BIAS(%u)", tex->lod_bias);
+        }
+        if (tex->use_reg_gradients)
+                printf(" USE_REG_GRADIENTS");
+        printf(" LOCATION(%s)", sample_loc[tex->sample_location]);
+        if (tex->offset_x || tex->offset_y || tex->offset_z)
+                printf(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z);
+}
+struct {
+        const char *name;
+        void (*fxn)(instr_fetch_t *cf);
+} fetch_instructions[] = {
+#define INSTR(opc, name, fxn) [opc] = { name, fxn }
+                INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx),
+                INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex),
+                INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex),
+                INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex),
+                INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex),
+                INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex),
+                INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex),
+                INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex),
+                INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex),
+                INSTR(TEX_RESERVED_4, "?", print_fetch_tex),
+#undef INSTR
+};
+static int disasm_fetch(uint32_t *dwords, uint32_t alu_off, int level, int sync)
+{
+        instr_fetch_t *fetch = (instr_fetch_t *)dwords;
+        printf("%s", levels[level]);
+        if (debug & PRINT_RAW) {
+                printf("%02x: %08x %08x %08x\t", alu_off,
+                                dwords[0], dwords[1], dwords[2]);
+        }
+        printf("   %sFETCH:\t", sync ? "(S)" : "   ");
+        printf("%s", fetch_instructions[fetch->opc].name);
+        fetch_instructions[fetch->opc].fxn(fetch);
+        printf("\n");
+        return 0;
+}
+/*
+ * CF instructions:
+ */
+static int cf_exec(instr_cf_t *cf)
+{
+        return (cf->opc == EXEC) ||
+                        (cf->opc == EXEC_END) ||
+                        (cf->opc == COND_EXEC) ||
+                        (cf->opc == COND_EXEC_END) ||
+                        (cf->opc == COND_PRED_EXEC) ||
+                        (cf->opc == COND_PRED_EXEC_END) ||
+                        (cf->opc == COND_EXEC_PRED_CLEAN) ||
+                        (cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+static int cf_cond_exec(instr_cf_t *cf)
+{
+        return (cf->opc == COND_EXEC) ||
+                        (cf->opc == COND_EXEC_END) ||
+                        (cf->opc == COND_PRED_EXEC) ||
+                        (cf->opc == COND_PRED_EXEC_END) ||
+                        (cf->opc == COND_EXEC_PRED_CLEAN) ||
+                        (cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+static void print_cf_nop(instr_cf_t *cf)
+{
+}
+static void print_cf_exec(instr_cf_t *cf)
+{
+        printf(" ADDR(0x%x) CNT(0x%x)", cf->exec.address, cf->exec.count);
+        if (cf->exec.yeild)
+                printf(" YIELD");
+        if (cf->exec.vc)
+                printf(" VC(0x%x)", cf->exec.vc);
+        if (cf->exec.bool_addr)
+                printf(" BOOL_ADDR(0x%x)", cf->exec.bool_addr);
+        if (cf->exec.address_mode == ABSOLUTE_ADDR)
+                printf(" ABSOLUTE_ADDR");
+        if (cf_cond_exec(cf))
+                printf(" COND(%d)", cf->exec.condition);
+}
+static void print_cf_loop(instr_cf_t *cf)
+{
+        printf(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address, cf->loop.loop_id);
+        if (cf->loop.address_mode == ABSOLUTE_ADDR)
+                printf(" ABSOLUTE_ADDR");
+}
+static void print_cf_jmp_call(instr_cf_t *cf)
+{
+        printf(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address, cf->jmp_call.direction);
+        if (cf->jmp_call.force_call)
+                printf(" FORCE_CALL");
+        if (cf->jmp_call.predicated_jmp)
+                printf(" COND(%d)", cf->jmp_call.condition);
+        if (cf->jmp_call.bool_addr)
+                printf(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr);
+        if (cf->jmp_call.address_mode == ABSOLUTE_ADDR)
+                printf(" ABSOLUTE_ADDR");
+}
+static void print_cf_alloc(instr_cf_t *cf)
+{
+        static const char *bufname[] = {
+                        [SQ_NO_ALLOC] = "NO ALLOC",
+                        [SQ_POSITION] = "POSITION",
+                        [SQ_PARAMETER_PIXEL] = "PARAM/PIXEL",
+                        [SQ_MEMORY] = "MEMORY",
+        };
+        printf(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select], cf->alloc.size);
+        if (cf->alloc.no_serial)
+                printf(" NO_SERIAL");
+        if (cf->alloc.alloc_mode) // ???
+                printf(" ALLOC_MODE");
+}
+struct {
+        const char *name;
+        void (*fxn)(instr_cf_t *cf);
+} cf_instructions[] = {
+#define INSTR(opc, fxn) [opc] = { #opc, fxn }
+                INSTR(NOP, print_cf_nop),
+                INSTR(EXEC, print_cf_exec),
+                INSTR(EXEC_END, print_cf_exec),
+                INSTR(COND_EXEC, print_cf_exec),
+                INSTR(COND_EXEC_END, print_cf_exec),
+                INSTR(COND_PRED_EXEC, print_cf_exec),
+                INSTR(COND_PRED_EXEC_END, print_cf_exec),
+                INSTR(LOOP_START, print_cf_loop),
+                INSTR(LOOP_END, print_cf_loop),
+                INSTR(COND_CALL, print_cf_jmp_call),
+                INSTR(RETURN, print_cf_jmp_call),
+                INSTR(COND_JMP, print_cf_jmp_call),
+                INSTR(ALLOC, print_cf_alloc),
+                INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec),
+                INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec),
+                INSTR(MARK_VS_FETCH_DONE, print_cf_nop),  // ??
+#undef INSTR
+};
+static void print_cf(instr_cf_t *cf, int level)
+{
+        printf("%s", levels[level]);
+        if (debug & PRINT_RAW) {
+                uint16_t *words = (uint16_t *)cf;
+                printf("    %04x %04x %04x            \t",
+                                words[0], words[1], words[2]);
+        }
+        printf("%s", cf_instructions[cf->opc].name);
+        cf_instructions[cf->opc].fxn(cf);
+        printf("\n");
+}
+/*
+ * The adreno shader microcode consists of two parts:
+ *   1) A CF (control-flow) program, at the header of the compiled shader,
+ *      which refers to ALU/FETCH instructions that follow it by address.
+ *   2) ALU and FETCH instructions
+ */
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+{
+        instr_cf_t *cfs = (instr_cf_t *)dwords;
+        int idx, max_idx;
+        for (idx = 0; ; idx++) {
+                instr_cf_t *cf = &cfs[idx];
+                if (cf_exec(cf)) {
+                        max_idx = 2 * cf->exec.address;
+                        break;
+                }
+        }
+        for (idx = 0; idx < max_idx; idx++) {
+                instr_cf_t *cf = &cfs[idx];
+                print_cf(cf, level);
+                if (cf_exec(cf)) {
+                        uint32_t sequence = cf->exec.serialize;
+                        uint32_t i;
+                        for (i = 0; i < cf->exec.count; i++) {
+                                uint32_t alu_off = (cf->exec.address + i);
+                                if (sequence & 0x1) {
+                                        disasm_fetch(dwords + alu_off * 3, alu_off, level, sequence & 0x2);
+                                } else {
+                                        disasm_alu(dwords + alu_off * 3, alu_off, level, sequence & 0x2, type);
+                                }
+                                sequence >>= 2;
+                        }
+                }
+        }
+        return 0;
+}
+void disasm_set_debug(enum debug_t d)
+{
+        debug = d;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_blend.c
 ,0 → 1,107
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd2_blend.h"
+#include "fd2_context.h"
+#include "fd2_util.h"
+static enum a2xx_rb_blend_opcode
+blend_func(unsigned func)
+{
+        switch (func) {
+        case PIPE_BLEND_ADD:
+                return BLEND_DST_PLUS_SRC;
+        case PIPE_BLEND_MIN:
+                return BLEND_MIN_DST_SRC;
+        case PIPE_BLEND_MAX:
+                return BLEND_MAX_DST_SRC;
+        case PIPE_BLEND_SUBTRACT:
+                return BLEND_SRC_MINUS_DST;
+        case PIPE_BLEND_REVERSE_SUBTRACT:
+                return BLEND_DST_MINUS_SRC;
+        default:
+                DBG("invalid blend func: %x", func);
+                return 0;
+        }
+}
+void *
+fd2_blend_state_create(struct pipe_context *pctx,
+                const struct pipe_blend_state *cso)
+{
+        const struct pipe_rt_blend_state *rt = &cso->rt[0];
+        struct fd2_blend_stateobj *so;
+        if (cso->logicop_enable) {
+                DBG("Unsupported! logicop");
+                return NULL;
+        }
+        if (cso->independent_blend_enable) {
+                DBG("Unsupported! independent blend state");
+                return NULL;
+        }
+        so = CALLOC_STRUCT(fd2_blend_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        so->rb_colorcontrol = A2XX_RB_COLORCONTROL_ROP_CODE(12);
+        so->rb_blendcontrol =
+                A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(fd_blend_factor(rt->rgb_src_factor)) |
+                A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
+                A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(fd_blend_factor(rt->rgb_dst_factor)) |
+                A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(fd_blend_factor(rt->alpha_src_factor)) |
+                A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
+                A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(fd_blend_factor(rt->alpha_dst_factor));
+        if (rt->colormask & PIPE_MASK_R)
+                so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_RED;
+        if (rt->colormask & PIPE_MASK_G)
+                so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_GREEN;
+        if (rt->colormask & PIPE_MASK_B)
+                so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_BLUE;
+        if (rt->colormask & PIPE_MASK_A)
+                so->rb_colormask |= A2XX_RB_COLOR_MASK_WRITE_ALPHA;
+        if (!rt->blend_enable)
+                so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_BLEND_DISABLE;
+        if (cso->dither)
+                so->rb_colorcontrol |= A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
 ,0 → 1,51
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_BLEND_H_
+#define FD2_BLEND_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+struct fd2_blend_stateobj {
+        struct pipe_blend_state base;
+        uint32_t rb_blendcontrol;
+        uint32_t rb_colorcontrol;   /* must be OR'd w/ zsa->rb_colorcontrol */
+        uint32_t rb_colormask;
+};
+static INLINE struct fd2_blend_stateobj *
+fd2_blend_stateobj(struct pipe_blend_state *blend)
+{
+        return (struct fd2_blend_stateobj *)blend;
+}
+void * fd2_blend_state_create(struct pipe_context *pctx,
+                const struct pipe_blend_state *cso);
+#endif /* FD2_BLEND_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_compiler.c
 ,0 → 1,1195
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_dump.h"
+#include "fd2_compiler.h"
+#include "fd2_program.h"
+#include "fd2_util.h"
+#include "instr-a2xx.h"
+#include "ir-a2xx.h"
+struct fd2_compile_context {
+        struct fd_program_stateobj *prog;
+        struct fd2_shader_stateobj *so;
+        struct tgsi_parse_context parser;
+        unsigned type;
+        /* predicate stack: */
+        int pred_depth;
+        enum ir2_pred pred_stack[8];
+        /* Internal-Temporary and Predicate register assignment:
+         *
+         * Some TGSI instructions which translate into multiple actual
+         * instructions need one or more temporary registers, which are not
+         * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
+         * And some instructions (texture fetch) cannot write directly to
+         * output registers.  We could be more clever and re-use dst or a
+         * src register in some cases.  But for now don't try to be clever.
+         * Eventually we should implement an optimization pass that re-
+         * juggles the register usage and gets rid of unneeded temporaries.
+         *
+         * The predicate register must be valid across multiple TGSI
+         * instructions, but internal temporary's do not.  For this reason,
+         * once the predicate register is requested, until it is no longer
+         * needed, it gets the first register slot after after the TGSI
+         * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
+         * internal temporaries get the register slots above this.
+         */
+        int pred_reg;
+        int num_internal_temps;
+        uint8_t num_regs[TGSI_FILE_COUNT];
+        /* maps input register idx to prog->export_linkage idx: */
+        uint8_t input_export_idx[64];
+        /* maps output register idx to prog->export_linkage idx: */
+        uint8_t output_export_idx[64];
+        /* idx/slot for last compiler generated immediate */
+        unsigned immediate_idx;
+        // TODO we can skip emit exports in the VS that the FS doesn't need..
+        // and get rid perhaps of num_param..
+        unsigned num_position, num_param;
+        unsigned position, psize;
+        uint64_t need_sync;
+        /* current exec CF instruction */
+        struct ir2_cf *cf;
+};
+static int
+semantic_idx(struct tgsi_declaration_semantic *semantic)
+{
+        int idx = semantic->Name;
+        if (idx == TGSI_SEMANTIC_GENERIC)
+                idx = TGSI_SEMANTIC_COUNT + semantic->Index;
+        return idx;
+}
+/* assign/get the input/export register # for given semantic idx as
+ * returned by semantic_idx():
+ */
+static int
+export_linkage(struct fd2_compile_context *ctx, int idx)
+{
+        struct fd_program_stateobj *prog = ctx->prog;
+        /* if first time we've seen this export, assign the next available slot: */
+        if (prog->export_linkage[idx] == 0xff)
+                prog->export_linkage[idx] = prog->num_exports++;
+        return prog->export_linkage[idx];
+}
+static unsigned
+compile_init(struct fd2_compile_context *ctx, struct fd_program_stateobj *prog,
+                struct fd2_shader_stateobj *so)
+{
+        unsigned ret;
+        ctx->prog = prog;
+        ctx->so = so;
+        ctx->cf = NULL;
+        ctx->pred_depth = 0;
+        ret = tgsi_parse_init(&ctx->parser, so->tokens);
+        if (ret != TGSI_PARSE_OK)
+                return ret;
+        ctx->type = ctx->parser.FullHeader.Processor.Processor;
+        ctx->position = ~0;
+        ctx->psize = ~0;
+        ctx->num_position = 0;
+        ctx->num_param = 0;
+        ctx->need_sync = 0;
+        ctx->immediate_idx = 0;
+        ctx->pred_reg = -1;
+        ctx->num_internal_temps = 0;
+        memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
+        memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
+        memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
+        /* do first pass to extract declarations: */
+        while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+                tgsi_parse_token(&ctx->parser);
+                switch (ctx->parser.FullToken.Token.Type) {
+                case TGSI_TOKEN_TYPE_DECLARATION: {
+                        struct tgsi_full_declaration *decl =
+                                        &ctx->parser.FullToken.FullDeclaration;
+                        if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+                                unsigned name = decl->Semantic.Name;
+                                assert(decl->Declaration.Semantic);  // TODO is this ever not true?
+                                ctx->output_export_idx[decl->Range.First] =
+                                                semantic_idx(&decl->Semantic);
+                                if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                                        switch (name) {
+                                        case TGSI_SEMANTIC_POSITION:
+                                                ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
+                                                ctx->num_position++;
+                                                break;
+                                        case TGSI_SEMANTIC_PSIZE:
+                                                ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
+                                                ctx->num_position++;
+                                                break;
+                                        case TGSI_SEMANTIC_COLOR:
+                                        case TGSI_SEMANTIC_GENERIC:
+                                                ctx->num_param++;
+                                                break;
+                                        default:
+                                                DBG("unknown VS semantic name: %s",
+                                                                tgsi_semantic_names[name]);
+                                                assert(0);
+                                        }
+                                } else {
+                                        switch (name) {
+                                        case TGSI_SEMANTIC_COLOR:
+                                        case TGSI_SEMANTIC_GENERIC:
+                                                ctx->num_param++;
+                                                break;
+                                        default:
+                                                DBG("unknown PS semantic name: %s",
+                                                                tgsi_semantic_names[name]);
+                                                assert(0);
+                                        }
+                                }
+                        } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+                                ctx->input_export_idx[decl->Range.First] =
+                                                semantic_idx(&decl->Semantic);
+                        }
+                        ctx->num_regs[decl->Declaration.File] =
+                                        MAX2(ctx->num_regs[decl->Declaration.File], decl->Range.Last + 1);
+                        break;
+                }
+                case TGSI_TOKEN_TYPE_IMMEDIATE: {
+                        struct tgsi_full_immediate *imm =
+                                        &ctx->parser.FullToken.FullImmediate;
+                        unsigned n = ctx->so->num_immediates++;
+                        memcpy(ctx->so->immediates[n].val, imm->u, 16);
+                        break;
+                }
+                default:
+                        break;
+                }
+        }
+        /* TGSI generated immediates are always entire vec4's, ones we
+         * generate internally are not:
+         */
+        ctx->immediate_idx = ctx->so->num_immediates * 4;
+        ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
+        tgsi_parse_free(&ctx->parser);
+        return tgsi_parse_init(&ctx->parser, so->tokens);
+}
+static void
+compile_free(struct fd2_compile_context *ctx)
+{
+        tgsi_parse_free(&ctx->parser);
+}
+static struct ir2_cf *
+next_exec_cf(struct fd2_compile_context *ctx)
+{
+        struct ir2_cf *cf = ctx->cf;
+        if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs))
+                ctx->cf = cf = ir2_cf_create(ctx->so->ir, EXEC);
+        return cf;
+}
+static void
+compile_vtx_fetch(struct fd2_compile_context *ctx)
+{
+        struct ir2_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
+        int i;
+        for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
+                struct ir2_instruction *instr = ir2_instr_create(
+                                next_exec_cf(ctx), IR2_FETCH);
+                instr->fetch.opc = VTX_FETCH;
+                ctx->need_sync |= 1 << (i+1);
+                ir2_reg_create(instr, i+1, "xyzw", 0);
+                ir2_reg_create(instr, 0, "x", 0);
+                if (i == 0)
+                        instr->sync = true;
+                vfetch_instrs[i] = instr;
+        }
+        ctx->so->num_vfetch_instrs = i;
+        ctx->cf = NULL;
+}
+/*
+ * For vertex shaders (VS):
+ * --- ------ -------------
+ *
+ *   Inputs:     R1-R(num_input)
+ *   Constants:  C0-C(num_const-1)
+ *   Immediates: C(num_const)-C(num_const+num_imm-1)
+ *   Outputs:    export0-export(n) and export62, export63
+ *      n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
+ *   Temps:      R(num_input+1)-R(num_input+num_temps)
+ *
+ * R0 could be clobbered after the vertex fetch instructions.. so we
+ * could use it for one of the temporaries.
+ *
+ * TODO: maybe the vertex fetch part could fetch first input into R0 as
+ * the last vtx fetch instruction, which would let us use the same
+ * register layout in either case.. although this is not what the blob
+ * compiler does.
+ *
+ *
+ * For frag shaders (PS):
+ * --- ---- -------------
+ *
+ *   Inputs:     R0-R(num_input-1)
+ *   Constants:  same as VS
+ *   Immediates: same as VS
+ *   Outputs:    export0-export(num_outputs)
+ *   Temps:      R(num_input)-R(num_input+num_temps-1)
+ *
+ * In either case, immediates are are postpended to the constants
+ * (uniforms).
+ *
+ */
+static unsigned
+get_temp_gpr(struct fd2_compile_context *ctx, int idx)
+{
+        unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
+        if (ctx->type == TGSI_PROCESSOR_VERTEX)
+                num++;
+        return num;
+}
+static struct ir2_register *
+add_dst_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
+                const struct tgsi_dst_register *dst)
+{
+        unsigned flags = 0, num = 0;
+        char swiz[5];
+        switch (dst->File) {
+        case TGSI_FILE_OUTPUT:
+                flags |= IR2_REG_EXPORT;
+                if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                        if (dst->Index == ctx->position) {
+                                num = 62;
+                        } else if (dst->Index == ctx->psize) {
+                                num = 63;
+                        } else {
+                                num = export_linkage(ctx,
+                                                ctx->output_export_idx[dst->Index]);
+                        }
+                } else {
+                        num = dst->Index;
+                }
+                break;
+        case TGSI_FILE_TEMPORARY:
+                num = get_temp_gpr(ctx, dst->Index);
+                break;
+        default:
+                DBG("unsupported dst register file: %s",
+                        tgsi_file_name(dst->File));
+                assert(0);
+                break;
+        }
+        swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
+        swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
+        swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
+        swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
+        swiz[4] = '\0';
+        return ir2_reg_create(alu, num, swiz, flags);
+}
+static struct ir2_register *
+add_src_reg(struct fd2_compile_context *ctx, struct ir2_instruction *alu,
+                const struct tgsi_src_register *src)
+{
+        static const char swiz_vals[] = {
+                        'x', 'y', 'z', 'w',
+        };
+        char swiz[5];
+        unsigned flags = 0, num = 0;
+        switch (src->File) {
+        case TGSI_FILE_CONSTANT:
+                num = src->Index;
+                flags |= IR2_REG_CONST;
+                break;
+        case TGSI_FILE_INPUT:
+                if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                        num = src->Index + 1;
+                } else {
+                        num = export_linkage(ctx,
+                                        ctx->input_export_idx[src->Index]);
+                }
+                break;
+        case TGSI_FILE_TEMPORARY:
+                num = get_temp_gpr(ctx, src->Index);
+                break;
+        case TGSI_FILE_IMMEDIATE:
+                num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
+                flags |= IR2_REG_CONST;
+                break;
+        default:
+                DBG("unsupported src register file: %s",
+                        tgsi_file_name(src->File));
+                assert(0);
+                break;
+        }
+        if (src->Absolute)
+                flags |= IR2_REG_ABS;
+        if (src->Negate)
+                flags |= IR2_REG_NEGATE;
+        swiz[0] = swiz_vals[src->SwizzleX];
+        swiz[1] = swiz_vals[src->SwizzleY];
+        swiz[2] = swiz_vals[src->SwizzleZ];
+        swiz[3] = swiz_vals[src->SwizzleW];
+        swiz[4] = '\0';
+        if ((ctx->need_sync & (uint64_t)(1 << num)) &&
+                        !(flags & IR2_REG_CONST)) {
+                alu->sync = true;
+                ctx->need_sync &= ~(uint64_t)(1 << num);
+        }
+        return ir2_reg_create(alu, num, swiz, flags);
+}
+static void
+add_vector_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
+{
+        switch (inst->Instruction.Saturate) {
+        case TGSI_SAT_NONE:
+                break;
+        case TGSI_SAT_ZERO_ONE:
+                alu->alu.vector_clamp = true;
+                break;
+        case TGSI_SAT_MINUS_PLUS_ONE:
+                DBG("unsupported saturate");
+                assert(0);
+                break;
+        }
+}
+static void
+add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
+{
+        switch (inst->Instruction.Saturate) {
+        case TGSI_SAT_NONE:
+                break;
+        case TGSI_SAT_ZERO_ONE:
+                alu->alu.scalar_clamp = true;
+                break;
+        case TGSI_SAT_MINUS_PLUS_ONE:
+                DBG("unsupported saturate");
+                assert(0);
+                break;
+        }
+}
+static void
+add_regs_vector_1(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
+{
+        assert(inst->Instruction.NumSrcRegs == 1);
+        assert(inst->Instruction.NumDstRegs == 1);
+        add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+        add_src_reg(ctx, alu, &inst->Src[0].Register);
+        add_src_reg(ctx, alu, &inst->Src[0].Register);
+        add_vector_clamp(inst, alu);
+}
+static void
+add_regs_vector_2(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
+{
+        assert(inst->Instruction.NumSrcRegs == 2);
+        assert(inst->Instruction.NumDstRegs == 1);
+        add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+        add_src_reg(ctx, alu, &inst->Src[0].Register);
+        add_src_reg(ctx, alu, &inst->Src[1].Register);
+        add_vector_clamp(inst, alu);
+}
+static void
+add_regs_vector_3(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
+{
+        assert(inst->Instruction.NumSrcRegs == 3);
+        assert(inst->Instruction.NumDstRegs == 1);
+        add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+        /* maybe should re-arrange the syntax some day, but
+         * in assembler/disassembler and what ir.c expects
+         * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+         */
+        add_src_reg(ctx, alu, &inst->Src[2].Register);
+        add_src_reg(ctx, alu, &inst->Src[0].Register);
+        add_src_reg(ctx, alu, &inst->Src[1].Register);
+        add_vector_clamp(inst, alu);
+}
+static void
+add_regs_dummy_vector(struct ir2_instruction *alu)
+{
+        /* create dummy, non-written vector dst/src regs
+         * for unused vector instr slot:
+         */
+        ir2_reg_create(alu, 0, "____", 0); /* vector dst */
+        ir2_reg_create(alu, 0, NULL, 0);   /* vector src1 */
+        ir2_reg_create(alu, 0, NULL, 0);   /* vector src2 */
+}
+static void
+add_regs_scalar_1(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst, struct ir2_instruction *alu)
+{
+        assert(inst->Instruction.NumSrcRegs == 1);
+        assert(inst->Instruction.NumDstRegs == 1);
+        add_regs_dummy_vector(alu);
+        add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+        add_src_reg(ctx, alu, &inst->Src[0].Register);
+        add_scalar_clamp(inst, alu);
+}
+/*
+ * Helpers for TGSI instructions that don't map to a single shader instr:
+ */
+static void
+src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
+{
+        src->File      = dst->File;
+        src->Indirect  = dst->Indirect;
+        src->Dimension = dst->Dimension;
+        src->Index     = dst->Index;
+        src->Absolute  = 0;
+        src->Negate    = 0;
+        src->SwizzleX  = TGSI_SWIZZLE_X;
+        src->SwizzleY  = TGSI_SWIZZLE_Y;
+        src->SwizzleZ  = TGSI_SWIZZLE_Z;
+        src->SwizzleW  = TGSI_SWIZZLE_W;
+}
+/* Get internal-temp src/dst to use for a sequence of instructions
+ * generated by a single TGSI op.
+ */
+static void
+get_internal_temp(struct fd2_compile_context *ctx,
+                struct tgsi_dst_register *tmp_dst,
+                struct tgsi_src_register *tmp_src)
+{
+        int n;
+        tmp_dst->File      = TGSI_FILE_TEMPORARY;
+        tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
+        tmp_dst->Indirect  = 0;
+        tmp_dst->Dimension = 0;
+        /* assign next temporary: */
+        n = ctx->num_internal_temps++;
+        if (ctx->pred_reg != -1)
+                n++;
+        tmp_dst->Index = ctx->num_regs[TGSI_FILE_TEMPORARY] + n;
+        src_from_dst(tmp_src, tmp_dst);
+}
+static void
+get_predicate(struct fd2_compile_context *ctx, struct tgsi_dst_register *dst,
+                struct tgsi_src_register *src)
+{
+        assert(ctx->pred_reg != -1);
+        dst->File      = TGSI_FILE_TEMPORARY;
+        dst->WriteMask = TGSI_WRITEMASK_W;
+        dst->Indirect  = 0;
+        dst->Dimension = 0;
+        dst->Index     = get_temp_gpr(ctx, ctx->pred_reg);
+        if (src) {
+                src_from_dst(src, dst);
+                src->SwizzleX  = TGSI_SWIZZLE_W;
+                src->SwizzleY  = TGSI_SWIZZLE_W;
+                src->SwizzleZ  = TGSI_SWIZZLE_W;
+                src->SwizzleW  = TGSI_SWIZZLE_W;
+        }
+}
+static void
+push_predicate(struct fd2_compile_context *ctx, struct tgsi_src_register *src)
+{
+        struct ir2_instruction *alu;
+        struct tgsi_dst_register pred_dst;
+        /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
+         * themselves:
+         */
+        ctx->cf = NULL;
+        if (ctx->pred_depth == 0) {
+                /* assign predicate register: */
+                ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
+                get_predicate(ctx, &pred_dst, NULL);
+                alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs);
+                add_regs_dummy_vector(alu);
+                add_dst_reg(ctx, alu, &pred_dst);
+                add_src_reg(ctx, alu, src);
+        } else {
+                struct tgsi_src_register pred_src;
+                get_predicate(ctx, &pred_dst, &pred_src);
+                alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+                add_dst_reg(ctx, alu, &pred_dst);
+                add_src_reg(ctx, alu, &pred_src);
+                add_src_reg(ctx, alu, src);
+                // XXX need to make PRED_SETE_PUSHv IR2_PRED_NONE.. but need to make
+                // sure src reg is valid if it was calculated with a predicate
+                // condition..
+                alu->pred = IR2_PRED_NONE;
+        }
+        /* save previous pred state to restore in pop_predicate(): */
+        ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
+        ctx->cf = NULL;
+}
+static void
+pop_predicate(struct fd2_compile_context *ctx)
+{
+        /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
+         * themselves:
+         */
+        ctx->cf = NULL;
+        /* restore previous predicate state: */
+        ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
+        if (ctx->pred_depth != 0) {
+                struct ir2_instruction *alu;
+                struct tgsi_dst_register pred_dst;
+                struct tgsi_src_register pred_src;
+                get_predicate(ctx, &pred_dst, &pred_src);
+                alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs);
+                add_regs_dummy_vector(alu);
+                add_dst_reg(ctx, alu, &pred_dst);
+                add_src_reg(ctx, alu, &pred_src);
+                alu->pred = IR2_PRED_NONE;
+        } else {
+                /* predicate register no longer needed: */
+                ctx->pred_reg = -1;
+        }
+        ctx->cf = NULL;
+}
+static void
+get_immediate(struct fd2_compile_context *ctx,
+                struct tgsi_src_register *reg, uint32_t val)
+{
+        unsigned neg, swiz, idx, i;
+        /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
+        static const unsigned swiz2tgsi[] = {
+                        TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+        };
+        for (i = 0; i < ctx->immediate_idx; i++) {
+                swiz = i % 4;
+                idx  = i / 4;
+                if (ctx->so->immediates[idx].val[swiz] == val) {
+                        neg = 0;
+                        break;
+                }
+                if (ctx->so->immediates[idx].val[swiz] == -val) {
+                        neg = 1;
+                        break;
+                }
+        }
+        if (i == ctx->immediate_idx) {
+                /* need to generate a new immediate: */
+                swiz = i % 4;
+                idx  = i / 4;
+                neg  = 0;
+                ctx->so->immediates[idx].val[swiz] = val;
+                ctx->so->num_immediates = idx + 1;
+                ctx->immediate_idx++;
+        }
+        reg->File      = TGSI_FILE_IMMEDIATE;
+        reg->Indirect  = 0;
+        reg->Dimension = 0;
+        reg->Index     = idx;
+        reg->Absolute  = 0;
+        reg->Negate    = neg;
+        reg->SwizzleX  = swiz2tgsi[swiz];
+        reg->SwizzleY  = swiz2tgsi[swiz];
+        reg->SwizzleZ  = swiz2tgsi[swiz];
+        reg->SwizzleW  = swiz2tgsi[swiz];
+}
+/* POW(a,b) = EXP2(b * LOG2(a)) */
+static void
+translate_pow(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register tmp_src;
+        struct ir2_instruction *alu;
+        get_internal_temp(ctx, &tmp_dst, &tmp_src);
+        alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP);
+        add_regs_dummy_vector(alu);
+        add_dst_reg(ctx, alu, &tmp_dst);
+        add_src_reg(ctx, alu, &inst->Src[0].Register);
+        alu = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+        add_dst_reg(ctx, alu, &tmp_dst);
+        add_src_reg(ctx, alu, &tmp_src);
+        add_src_reg(ctx, alu, &inst->Src[1].Register);
+        /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
+         * coded to take their input from the w component.
+         */
+        switch(inst->Dst[0].Register.WriteMask) {
+        case TGSI_WRITEMASK_X:
+                tmp_src.SwizzleW = TGSI_SWIZZLE_X;
+                break;
+        case TGSI_WRITEMASK_Y:
+                tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
+                break;
+        case TGSI_WRITEMASK_Z:
+                tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
+                break;
+        case TGSI_WRITEMASK_W:
+                tmp_src.SwizzleW = TGSI_SWIZZLE_W;
+                break;
+        default:
+                DBG("invalid writemask!");
+                assert(0);
+                break;
+        }
+        alu = ir2_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE);
+        add_regs_dummy_vector(alu);
+        add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+        add_src_reg(ctx, alu, &tmp_src);
+        add_scalar_clamp(inst, alu);
+}
+static void
+translate_tex(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst, unsigned opc)
+{
+        struct ir2_instruction *instr;
+        struct ir2_register *reg;
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register tmp_src;
+        const struct tgsi_src_register *coord;
+        bool using_temp = (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) ||
+                        (inst->Instruction.Saturate != TGSI_SAT_NONE);
+        int idx;
+        if (using_temp || (opc == TGSI_OPCODE_TXP))
+                get_internal_temp(ctx, &tmp_dst, &tmp_src);
+        if (opc == TGSI_OPCODE_TXP) {
+                static const char *swiz[] = {
+                                [TGSI_SWIZZLE_X] = "xxxx",
+                                [TGSI_SWIZZLE_Y] = "yyyy",
+                                [TGSI_SWIZZLE_Z] = "zzzz",
+                                [TGSI_SWIZZLE_W] = "wwww",
+                };
+                /* TXP - Projective Texture Lookup:
+                 *
+                 *  coord.x = src0.x / src.w
+                 *  coord.y = src0.y / src.w
+                 *  coord.z = src0.z / src.w
+                 *  coord.w = src0.w
+                 *  bias = 0.0
+                 *
+                 *  dst = texture_sample(unit, coord, bias)
+                 */
+                instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE);
+                /* MAXv: */
+                add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
+                add_src_reg(ctx, instr, &inst->Src[0].Register);
+                add_src_reg(ctx, instr, &inst->Src[0].Register);
+                /* RECIP_IEEE: */
+                add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
+                add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle =
+                                swiz[inst->Src[0].Register.SwizzleW];
+                instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+                add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
+                add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
+                add_src_reg(ctx, instr, &inst->Src[0].Register);
+                coord = &tmp_src;
+        } else {
+                coord = &inst->Src[0].Register;
+        }
+        instr = ir2_instr_create(next_exec_cf(ctx), IR2_FETCH);
+        instr->fetch.opc = TEX_FETCH;
+        instr->fetch.is_cube = (inst->Texture.Texture == TGSI_TEXTURE_3D);
+        assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
+        /* save off the tex fetch to be patched later with correct const_idx: */
+        idx = ctx->so->num_tfetch_instrs++;
+        ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
+        ctx->so->tfetch_instrs[idx].instr = instr;
+        add_dst_reg(ctx, instr, using_temp ? &tmp_dst : &inst->Dst[0].Register);
+        reg = add_src_reg(ctx, instr, coord);
+        /* blob compiler always sets 3rd component to same as 1st for 2d: */
+        if (inst->Texture.Texture == TGSI_TEXTURE_2D)
+                reg->swizzle[2] = reg->swizzle[0];
+        /* dst register needs to be marked for sync: */
+        ctx->need_sync |= 1 << instr->regs[0]->num;
+        /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
+        instr->sync = true;
+        if (using_temp) {
+                /* texture fetch can't write directly to export, so if tgsi
+                 * is telling us the dst register is in output file, we load
+                 * the texture to a temp and the use ALU instruction to move
+                 * to output
+                 */
+                instr = ir2_instr_create_alu(next_exec_cf(ctx), MAXv, ~0);
+                add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+                add_src_reg(ctx, instr, &tmp_src);
+                add_src_reg(ctx, instr, &tmp_src);
+                add_vector_clamp(inst, instr);
+        }
+}
+/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
+/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
+static void
+translate_sge_slt(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst, unsigned opc)
+{
+        struct ir2_instruction *instr;
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register tmp_src;
+        struct tgsi_src_register tmp_const;
+        float c0, c1;
+        switch (opc) {
+        default:
+                assert(0);
+        case TGSI_OPCODE_SGE:
+                c0 = 1.0;
+                c1 = 0.0;
+                break;
+        case TGSI_OPCODE_SLT:
+                c0 = 0.0;
+                c1 = 1.0;
+                break;
+        }
+        get_internal_temp(ctx, &tmp_dst, &tmp_src);
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
+        add_dst_reg(ctx, instr, &tmp_dst);
+        add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
+        add_src_reg(ctx, instr, &inst->Src[1].Register);
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0);
+        add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+        /* maybe should re-arrange the syntax some day, but
+         * in assembler/disassembler and what ir.c expects
+         * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+         */
+        get_immediate(ctx, &tmp_const, fui(c0));
+        add_src_reg(ctx, instr, &tmp_const);
+        add_src_reg(ctx, instr, &tmp_src);
+        get_immediate(ctx, &tmp_const, fui(c1));
+        add_src_reg(ctx, instr, &tmp_const);
+}
+/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
+static void
+translate_lrp(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst,
+                unsigned opc)
+{
+        struct ir2_instruction *instr;
+        struct tgsi_dst_register tmp_dst1, tmp_dst2;
+        struct tgsi_src_register tmp_src1, tmp_src2;
+        struct tgsi_src_register tmp_const;
+        get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
+        get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
+        get_immediate(ctx, &tmp_const, fui(1.0));
+        /* tmp1 = (a * b) */
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+        add_dst_reg(ctx, instr, &tmp_dst1);
+        add_src_reg(ctx, instr, &inst->Src[0].Register);
+        add_src_reg(ctx, instr, &inst->Src[1].Register);
+        /* tmp2 = (1 - a) */
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
+        add_dst_reg(ctx, instr, &tmp_dst2);
+        add_src_reg(ctx, instr, &tmp_const);
+        add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR2_REG_NEGATE;
+        /* tmp2 = tmp2 * c */
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+        add_dst_reg(ctx, instr, &tmp_dst2);
+        add_src_reg(ctx, instr, &tmp_src2);
+        add_src_reg(ctx, instr, &inst->Src[2].Register);
+        /* dst = tmp1 + tmp2 */
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
+        add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+        add_src_reg(ctx, instr, &tmp_src1);
+        add_src_reg(ctx, instr, &tmp_src2);
+}
+static void
+translate_trig(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst,
+                unsigned opc)
+{
+        struct ir2_instruction *instr;
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register tmp_src;
+        struct tgsi_src_register tmp_const;
+        instr_scalar_opc_t op;
+        switch (opc) {
+        default:
+                assert(0);
+        case TGSI_OPCODE_SIN:
+                op = SIN;
+                break;
+        case TGSI_OPCODE_COS:
+                op = COS;
+                break;
+        }
+        get_internal_temp(ctx, &tmp_dst, &tmp_src);
+        tmp_dst.WriteMask = TGSI_WRITEMASK_X;
+        tmp_src.SwizzleX = tmp_src.SwizzleY =
+                        tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
+        /* maybe should re-arrange the syntax some day, but
+         * in assembler/disassembler and what ir.c expects
+         * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+         */
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
+        add_dst_reg(ctx, instr, &tmp_dst);
+        get_immediate(ctx, &tmp_const, fui(0.5));
+        add_src_reg(ctx, instr, &tmp_const);
+        add_src_reg(ctx, instr, &inst->Src[0].Register);
+        get_immediate(ctx, &tmp_const, fui(0.159155));
+        add_src_reg(ctx, instr, &tmp_const);
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), FRACv, ~0);
+        add_dst_reg(ctx, instr, &tmp_dst);
+        add_src_reg(ctx, instr, &tmp_src);
+        add_src_reg(ctx, instr, &tmp_src);
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
+        add_dst_reg(ctx, instr, &tmp_dst);
+        get_immediate(ctx, &tmp_const, fui(-3.141593));
+        add_src_reg(ctx, instr, &tmp_const);
+        add_src_reg(ctx, instr, &tmp_src);
+        get_immediate(ctx, &tmp_const, fui(6.283185));
+        add_src_reg(ctx, instr, &tmp_const);
+        instr = ir2_instr_create_alu(next_exec_cf(ctx), ~0, op);
+        add_regs_dummy_vector(instr);
+        add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+        add_src_reg(ctx, instr, &tmp_src);
+}
+/*
+ * Main part of compiler/translator:
+ */
+static void
+translate_instruction(struct fd2_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        unsigned opc = inst->Instruction.Opcode;
+        struct ir2_instruction *instr;
+        static struct ir2_cf *cf;
+        if (opc == TGSI_OPCODE_END)
+                return;
+        if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+                unsigned num = inst->Dst[0].Register.Index;
+                /* seems like we need to ensure that position vs param/pixel
+                 * exports don't end up in the same EXEC clause..  easy way
+                 * to do this is force a new EXEC clause on first appearance
+                 * of an position or param/pixel export.
+                 */
+                if ((num == ctx->position) || (num == ctx->psize)) {
+                        if (ctx->num_position > 0) {
+                                ctx->cf = NULL;
+                                ir2_cf_create_alloc(ctx->so->ir, SQ_POSITION,
+                                                ctx->num_position - 1);
+                                ctx->num_position = 0;
+                        }
+                } else {
+                        if (ctx->num_param > 0) {
+                                ctx->cf = NULL;
+                                ir2_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL,
+                                                ctx->num_param - 1);
+                                ctx->num_param = 0;
+                        }
+                }
+        }
+        cf = next_exec_cf(ctx);
+        /* TODO turn this into a table: */
+        switch (opc) {
+        case TGSI_OPCODE_MOV:
+                instr = ir2_instr_create_alu(cf, MAXv, ~0);
+                add_regs_vector_1(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_RCP:
+                instr = ir2_instr_create_alu(cf, ~0, RECIP_IEEE);
+                add_regs_scalar_1(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_RSQ:
+                instr = ir2_instr_create_alu(cf, ~0, RECIPSQ_IEEE);
+                add_regs_scalar_1(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_SQRT:
+                instr = ir2_instr_create_alu(cf, ~0, SQRT_IEEE);
+                add_regs_scalar_1(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_MUL:
+                instr = ir2_instr_create_alu(cf, MULv, ~0);
+                add_regs_vector_2(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_ADD:
+                instr = ir2_instr_create_alu(cf, ADDv, ~0);
+                add_regs_vector_2(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_DP3:
+                instr = ir2_instr_create_alu(cf, DOT3v, ~0);
+                add_regs_vector_2(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_DP4:
+                instr = ir2_instr_create_alu(cf, DOT4v, ~0);
+                add_regs_vector_2(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_MIN:
+                instr = ir2_instr_create_alu(cf, MINv, ~0);
+                add_regs_vector_2(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_MAX:
+                instr = ir2_instr_create_alu(cf, MAXv, ~0);
+                add_regs_vector_2(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_SLT:
+        case TGSI_OPCODE_SGE:
+                translate_sge_slt(ctx, inst, opc);
+                break;
+        case TGSI_OPCODE_MAD:
+                instr = ir2_instr_create_alu(cf, MULADDv, ~0);
+                add_regs_vector_3(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_LRP:
+                translate_lrp(ctx, inst, opc);
+                break;
+        case TGSI_OPCODE_FRC:
+                instr = ir2_instr_create_alu(cf, FRACv, ~0);
+                add_regs_vector_1(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_FLR:
+                instr = ir2_instr_create_alu(cf, FLOORv, ~0);
+                add_regs_vector_1(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_EX2:
+                instr = ir2_instr_create_alu(cf, ~0, EXP_IEEE);
+                add_regs_scalar_1(ctx, inst, instr);
+                break;
+        case TGSI_OPCODE_POW:
+                translate_pow(ctx, inst);
+                break;
+        case TGSI_OPCODE_ABS:
+                instr = ir2_instr_create_alu(cf, MAXv, ~0);
+                add_regs_vector_1(ctx, inst, instr);
+                instr->regs[1]->flags |= IR2_REG_NEGATE; /* src0 */
+                break;
+        case TGSI_OPCODE_COS:
+        case TGSI_OPCODE_SIN:
+                translate_trig(ctx, inst, opc);
+                break;
+        case TGSI_OPCODE_TEX:
+        case TGSI_OPCODE_TXP:
+                translate_tex(ctx, inst, opc);
+                break;
+        case TGSI_OPCODE_CMP:
+                instr = ir2_instr_create_alu(cf, CNDGTEv, ~0);
+                add_regs_vector_3(ctx, inst, instr);
+                // TODO this should be src0 if regs where in sane order..
+                instr->regs[2]->flags ^= IR2_REG_NEGATE; /* src1 */
+                break;
+        case TGSI_OPCODE_IF:
+                push_predicate(ctx, &inst->Src[0].Register);
+                ctx->so->ir->pred = IR2_PRED_EQ;
+                break;
+        case TGSI_OPCODE_ELSE:
+                ctx->so->ir->pred = IR2_PRED_NE;
+                /* not sure if this is required in all cases, but blob compiler
+                 * won't combine EQ and NE in same CF:
+                 */
+                ctx->cf = NULL;
+                break;
+        case TGSI_OPCODE_ENDIF:
+                pop_predicate(ctx);
+                break;
+        case TGSI_OPCODE_F2I:
+                instr = ir2_instr_create_alu(cf, TRUNCv, ~0);
+                add_regs_vector_1(ctx, inst, instr);
+                break;
+        default:
+                DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
+                tgsi_dump(ctx->so->tokens, 0);
+                assert(0);
+                break;
+        }
+        /* internal temporaries are only valid for the duration of a single
+         * TGSI instruction:
+         */
+        ctx->num_internal_temps = 0;
+}
+static void
+compile_instructions(struct fd2_compile_context *ctx)
+{
+        while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+                tgsi_parse_token(&ctx->parser);
+                switch (ctx->parser.FullToken.Token.Type) {
+                case TGSI_TOKEN_TYPE_INSTRUCTION:
+                        translate_instruction(ctx,
+                                        &ctx->parser.FullToken.FullInstruction);
+                        break;
+                default:
+                        break;
+                }
+        }
+        ctx->cf->cf_type = EXEC_END;
+}
+int
+fd2_compile_shader(struct fd_program_stateobj *prog,
+                struct fd2_shader_stateobj *so)
+{
+        struct fd2_compile_context ctx;
+        ir2_shader_destroy(so->ir);
+        so->ir = ir2_shader_create();
+        so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
+        if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
+                return -1;
+        if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+                compile_vtx_fetch(&ctx);
+        } else if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+                prog->num_exports = 0;
+                memset(prog->export_linkage, 0xff,
+                                sizeof(prog->export_linkage));
+        }
+        compile_instructions(&ctx);
+        compile_free(&ctx);
+        return 0;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_compiler.h
 ,0 → 1,38
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_COMPILER_H_
+#define FD2_COMPILER_H_
+#include "fd2_program.h"
+#include "fd2_util.h"
+int fd2_compile_shader(struct fd_program_stateobj *prog,
+                struct fd2_shader_stateobj *so);
+#endif /* FD2_COMPILER_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_context.c
 ,0 → 1,125
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "fd2_context.h"
+#include "fd2_blend.h"
+#include "fd2_draw.h"
+#include "fd2_emit.h"
+#include "fd2_gmem.h"
+#include "fd2_program.h"
+#include "fd2_rasterizer.h"
+#include "fd2_texture.h"
+#include "fd2_zsa.h"
+static void
+fd2_context_destroy(struct pipe_context *pctx)
+{
+        fd_context_destroy(pctx);
+}
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+        static const float init_shader_const[] = {
+                        /* for clear/gmem2mem: */
+                        -1.000000, +1.000000, +1.000000, +1.100000,
+                        +1.000000, +1.000000, -1.000000, -1.100000,
+                        +1.000000, +1.100000, -1.100000, +1.000000,
+                        /* for mem2gmem: (vertices) */
+                        -1.000000, +1.000000, +1.000000, +1.000000,
+                        +1.000000, +1.000000, -1.000000, -1.000000,
+                        +1.000000, +1.000000, -1.000000, +1.000000,
+                        /* for mem2gmem: (tex coords) */
+                        +0.000000, +0.000000, +1.000000, +0.000000,
+                        +0.000000, +1.000000, +1.000000, +1.000000,
+        };
+        struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                        PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+        pipe_buffer_write(pctx, prsc, 0,
+                        sizeof(init_shader_const), init_shader_const);
+        return prsc;
+}
+static const uint8_t a22x_primtypes[PIPE_PRIM_MAX] = {
+                [PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A2XX,
+                [PIPE_PRIM_LINES]          = DI_PT_LINELIST,
+                [PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
+                [PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
+                [PIPE_PRIM_TRIANGLES]      = DI_PT_TRILIST,
+                [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+                [PIPE_PRIM_TRIANGLE_FAN]   = DI_PT_TRIFAN,
+};
+static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
+                [PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A2XX,
+                [PIPE_PRIM_LINES]          = DI_PT_LINELIST,
+                [PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
+                [PIPE_PRIM_TRIANGLES]      = DI_PT_TRILIST,
+                [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+                [PIPE_PRIM_TRIANGLE_FAN]   = DI_PT_TRIFAN,
+};
+struct pipe_context *
+fd2_context_create(struct pipe_screen *pscreen, void *priv)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
+        struct pipe_context *pctx;
+        if (!fd2_ctx)
+                return NULL;
+        pctx = &fd2_ctx->base.base;
+        fd2_ctx->base.dev = fd_device_ref(screen->dev);
+        fd2_ctx->base.screen = fd_screen(pscreen);
+        pctx->destroy = fd2_context_destroy;
+        pctx->create_blend_state = fd2_blend_state_create;
+        pctx->create_rasterizer_state = fd2_rasterizer_state_create;
+        pctx->create_depth_stencil_alpha_state = fd2_zsa_state_create;
+        fd2_draw_init(pctx);
+        fd2_gmem_init(pctx);
+        fd2_texture_init(pctx);
+        fd2_prog_init(pctx);
+        pctx = fd_context_init(&fd2_ctx->base, pscreen,
+                        (screen->gpu_id >= 220) ? a22x_primtypes : a20x_primtypes,
+                        priv);
+        if (!pctx)
+                return NULL;
+        /* construct vertex state used for solid ops (clear, and gmem<->mem) */
+        fd2_ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
+        fd2_emit_setup(&fd2_ctx->base);
+        return pctx;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_context.h
 ,0 → 1,52
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_CONTEXT_H_
+#define FD2_CONTEXT_H_
+#include "freedreno_context.h"
+struct fd2_context {
+        struct fd_context base;
+        /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+         * vertices and tex coords:
+         */
+        struct pipe_resource *solid_vertexbuf;
+};
+static INLINE struct fd2_context *
+fd2_context(struct fd_context *ctx)
+{
+        return (struct fd2_context *)ctx;
+}
+struct pipe_context *
+fd2_context_create(struct pipe_screen *pscreen, void *priv);
+#endif /* FD2_CONTEXT_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
 ,0 → 1,285
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "fd2_draw.h"
+#include "fd2_context.h"
+#include "fd2_emit.h"
+#include "fd2_program.h"
+#include "fd2_util.h"
+#include "fd2_zsa.h"
+static void
+emit_cacheflush(struct fd_ringbuffer *ring)
+{
+        unsigned i;
+        for (i = 0; i < 12; i++) {
+                OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+                OUT_RING(ring, CACHE_FLUSH);
+        }
+}
+static void
+emit_vertexbufs(struct fd_context *ctx)
+{
+        struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
+        struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
+        struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
+        unsigned i;
+        if (!vtx->num_elements)
+                return;
+        for (i = 0; i < vtx->num_elements; i++) {
+                struct pipe_vertex_element *elem = &vtx->pipe[i];
+                struct pipe_vertex_buffer *vb =
+                                &vertexbuf->vb[elem->vertex_buffer_index];
+                bufs[i].offset = vb->buffer_offset;
+                bufs[i].size = fd_bo_size(fd_resource(vb->buffer)->bo);
+                bufs[i].prsc = vb->buffer;
+        }
+        // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
+        // CONST(20,0) (or CONST(26,0) in soliv_vp)
+        fd2_emit_vertex_bufs(ctx->ring, 0x78, bufs, vtx->num_elements);
+}
+static void
+fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        if (ctx->dirty & FD_DIRTY_VTXBUF)
+                emit_vertexbufs(ctx);
+        fd2_emit_state(ctx, ctx->dirty);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+        OUT_RING(ring, info->start);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+        OUT_RING(ring, 0x0000003b);
+        OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
+        OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
+        OUT_WFI (ring);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+        OUT_RING(ring, info->max_index);        /* VGT_MAX_VTX_INDX */
+        OUT_RING(ring, info->min_index);        /* VGT_MIN_VTX_INDX */
+        fd_draw_emit(ctx, ring, ctx->primtypes[info->mode],
+                                 IGNORE_VISIBILITY, info);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
+        OUT_RING(ring, 0x00000000);
+        emit_cacheflush(ring);
+}
+static void
+fd2_clear(struct fd_context *ctx, unsigned buffers,
+                const union pipe_color_union *color, double depth, unsigned stencil)
+{
+        struct fd2_context *fd2_ctx = fd2_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *fb = &ctx->framebuffer;
+        uint32_t reg, colr = 0;
+        if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
+                colr  = pack_rgba(fb->cbufs[0]->format, color->f);
+        /* emit generic state now: */
+        fd2_emit_state(ctx, ctx->dirty &
+                        (FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT |
+                                        FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+        fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
+                        { .prsc = fd2_ctx->solid_vertexbuf, .size = 48 },
+                }, 1);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+        OUT_RING(ring, 0);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+        OUT_RING(ring, 0x0000028f);
+        fd2_program_emit(ring, &ctx->solid_prog);
+        OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
+        OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
+        OUT_RING(ring, colr);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
+        OUT_RING(ring, 0x00000084);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+        reg = 0;
+        if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+                reg |= A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE;
+                switch (fd_pipe2depth(fb->zsbuf->format)) {
+                case DEPTHX_24_8:
+                        if (buffers & PIPE_CLEAR_DEPTH)
+                                reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xe);
+                        if (buffers & PIPE_CLEAR_STENCIL)
+                                reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0x1);
+                        break;
+                case DEPTHX_16:
+                        if (buffers & PIPE_CLEAR_DEPTH)
+                                reg |= A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf);
+                        break;
+                default:
+                        debug_assert(0);
+                        break;
+                }
+        }
+        OUT_RING(ring, reg);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
+        reg = 0;
+        if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+                switch (fd_pipe2depth(fb->zsbuf->format)) {
+                case DEPTHX_24_8:
+                        reg = (((uint32_t)(0xffffff * depth)) << 8) |
+                                (stencil & 0xff);
+                        break;
+                case DEPTHX_16:
+                        reg = (uint32_t)(0xffffffff * depth);
+                        break;
+                default:
+                        debug_assert(0);
+                        break;
+                }
+        }
+        OUT_RING(ring, reg);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+        reg = 0;
+        if (buffers & PIPE_CLEAR_DEPTH) {
+                reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
+                                A2XX_RB_DEPTHCONTROL_Z_ENABLE |
+                                A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
+                                A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
+        }
+        if (buffers & PIPE_CLEAR_STENCIL) {
+                reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
+                                A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
+                                A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
+        }
+        OUT_RING(ring, reg);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+        OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+        OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
+        OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
+                        A2XX_RB_COLORCONTROL_BLEND_DISABLE |
+                        A2XX_RB_COLORCONTROL_ROP_CODE(12) |
+                        A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
+                        A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+        OUT_RING(ring, 0x00000000);        /* PA_CL_CLIP_CNTL */
+        OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
+                        A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+        OUT_RING(ring, 0x0000ffff);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+        OUT_RING(ring, xy2d(0,0));              /* PA_SC_WINDOW_SCISSOR_TL */
+        OUT_RING(ring, xy2d(fb->width,      /* PA_SC_WINDOW_SCISSOR_BR */
+                        fb->height));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+        if (buffers & PIPE_CLEAR_COLOR) {
+                OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
+                                A2XX_RB_COLOR_MASK_WRITE_GREEN |
+                                A2XX_RB_COLOR_MASK_WRITE_BLUE |
+                                A2XX_RB_COLOR_MASK_WRITE_ALPHA);
+        } else {
+                OUT_RING(ring, 0x0);
+        }
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+        OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
+        OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
+        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+        OUT_RING(ring, 0x00000000);
+}
+void
+fd2_draw_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->draw_vbo = fd2_draw_vbo;
+        ctx->clear = fd2_clear;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_draw.h
 ,0 → 1,38
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_DRAW_H_
+#define FD2_DRAW_H_
+#include "pipe/p_context.h"
+#include "freedreno_draw.h"
+void fd2_draw_init(struct pipe_context *pctx);
+#endif /* FD2_DRAW_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
 ,0 → 1,448
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "freedreno_resource.h"
+#include "fd2_emit.h"
+#include "fd2_blend.h"
+#include "fd2_context.h"
+#include "fd2_program.h"
+#include "fd2_rasterizer.h"
+#include "fd2_texture.h"
+#include "fd2_util.h"
+#include "fd2_zsa.h"
+/* NOTE: just define the position for const regs statically.. the blob
+ * driver doesn't seem to change these dynamically, and I can't really
+ * think of a good reason to so..
+ */
+#define VS_CONST_BASE 0x20
+#define PS_CONST_BASE 0x120
+static void
+emit_constants(struct fd_ringbuffer *ring, uint32_t base,
+                struct fd_constbuf_stateobj *constbuf,
+                struct fd2_shader_stateobj *shader)
+{
+        uint32_t enabled_mask = constbuf->enabled_mask;
+        uint32_t start_base = base;
+        unsigned i;
+        // XXX TODO only emit dirty consts.. but we need to keep track if
+        // they are clobbered by a clear, gmem2mem, or mem2gmem..
+        constbuf->dirty_mask = enabled_mask;
+        /* emit user constants: */
+        while (enabled_mask) {
+                unsigned index = ffs(enabled_mask) - 1;
+                struct pipe_constant_buffer *cb = &constbuf->cb[index];
+                unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+                // I expect that size should be a multiple of vec4's:
+                assert(size == align(size, 4));
+                /* hmm, sometimes we still seem to end up with consts bound,
+                 * even if shader isn't using them, which ends up overwriting
+                 * const reg's used for immediates.. this is a hack to work
+                 * around that:
+                 */
+                if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
+                        break;
+                if (constbuf->dirty_mask & (1 << index)) {
+                        const uint32_t *dwords;
+                        if (cb->user_buffer) {
+                                dwords = cb->user_buffer;
+                        } else {
+                                struct fd_resource *rsc = fd_resource(cb->buffer);
+                                dwords = fd_bo_map(rsc->bo);
+                        }
+                        dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
+                        OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
+                        OUT_RING(ring, base);
+                        for (i = 0; i < size; i++)
+                                OUT_RING(ring, *(dwords++));
+                        constbuf->dirty_mask &= ~(1 << index);
+                }
+                base += size;
+                enabled_mask &= ~(1 << index);
+        }
+        /* emit shader immediates: */
+        if (shader) {
+                for (i = 0; i < shader->num_immediates; i++) {
+                        OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+                        OUT_RING(ring, start_base + (4 * (shader->first_immediate + i)));
+                        OUT_RING(ring, shader->immediates[i].val[0]);
+                        OUT_RING(ring, shader->immediates[i].val[1]);
+                        OUT_RING(ring, shader->immediates[i].val[2]);
+                        OUT_RING(ring, shader->immediates[i].val[3]);
+                        base += 4;
+                }
+        }
+}
+typedef uint32_t texmask;
+static texmask
+emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
+                struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
+{
+        unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
+        static const struct fd2_sampler_stateobj dummy_sampler = {};
+        const struct fd2_sampler_stateobj *sampler;
+        struct fd2_pipe_sampler_view *view;
+        if (emitted & (1 << const_idx))
+                return 0;
+        sampler = tex->samplers[samp_id] ?
+                        fd2_sampler_stateobj(tex->samplers[samp_id]) :
+                        &dummy_sampler;
+        view = fd2_pipe_sampler_view(tex->textures[samp_id]);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+        OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
+        OUT_RING(ring, sampler->tex0 | view->tex0);
+        OUT_RELOC(ring, fd_resource(view->base.texture)->bo, 0, view->fmt, 0);
+        OUT_RING(ring, view->tex2);
+        OUT_RING(ring, sampler->tex3 | view->tex3);
+        OUT_RING(ring, sampler->tex4);
+        OUT_RING(ring, sampler->tex5);
+        return (1 << const_idx);
+}
+static void
+emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx)
+{
+        texmask emitted = 0;
+        unsigned i;
+        for (i = 0; i < ctx->verttex.num_samplers; i++)
+                if (ctx->verttex.samplers[i])
+                        emitted |= emit_texture(ring, ctx, &ctx->verttex, i, emitted);
+        for (i = 0; i < ctx->fragtex.num_samplers; i++)
+                if (ctx->fragtex.samplers[i])
+                        emitted |= emit_texture(ring, ctx, &ctx->fragtex, i, emitted);
+}
+void
+fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
+                struct fd2_vertex_buf *vbufs, uint32_t n)
+{
+        unsigned i;
+        OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
+        OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
+        for (i = 0; i < n; i++) {
+                struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
+                OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3, 0);
+                OUT_RING (ring, vbufs[i].size);
+        }
+}
+void
+fd2_emit_state(struct fd_context *ctx, uint32_t dirty)
+{
+        struct fd2_blend_stateobj *blend = fd2_blend_stateobj(ctx->blend);
+        struct fd2_zsa_stateobj *zsa = fd2_zsa_stateobj(ctx->zsa);
+        struct fd_ringbuffer *ring = ctx->ring;
+        /* NOTE: we probably want to eventually refactor this so each state
+         * object handles emitting it's own state..  although the mapping of
+         * state to registers is not always orthogonal, sometimes a single
+         * register contains bitfields coming from multiple state objects,
+         * so not sure the best way to deal with that yet.
+         */
+        if (dirty & FD_DIRTY_SAMPLE_MASK) {
+                OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+                OUT_RING(ring, ctx->sample_mask);
+        }
+        if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+                struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+                OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+                OUT_RING(ring, zsa->rb_depthcontrol);
+                OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+                OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
+                OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+                                A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
+                OUT_RING(ring, zsa->rb_stencilrefmask |
+                                A2XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+                OUT_RING(ring, zsa->rb_alpha_ref);
+        }
+        if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+                struct fd2_rasterizer_stateobj *rasterizer =
+                                fd2_rasterizer_stateobj(ctx->rasterizer);
+                OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+                OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+                OUT_RING(ring, rasterizer->pa_cl_clip_cntl);
+                OUT_RING(ring, rasterizer->pa_su_sc_mode_cntl |
+                                A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE);
+                OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+                OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_POINT_SIZE));
+                OUT_RING(ring, rasterizer->pa_su_point_size);
+                OUT_RING(ring, rasterizer->pa_su_point_minmax);
+                OUT_RING(ring, rasterizer->pa_su_line_cntl);
+                OUT_RING(ring, rasterizer->pa_sc_line_stipple);
+                OUT_PKT3(ring, CP_SET_CONSTANT, 6);
+                OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_VTX_CNTL));
+                OUT_RING(ring, rasterizer->pa_su_vtx_cntl);
+                OUT_RING(ring, fui(1.0));                /* PA_CL_GB_VERT_CLIP_ADJ */
+                OUT_RING(ring, fui(1.0));                /* PA_CL_GB_VERT_DISC_ADJ */
+                OUT_RING(ring, fui(1.0));                /* PA_CL_GB_HORZ_CLIP_ADJ */
+                OUT_RING(ring, fui(1.0));                /* PA_CL_GB_HORZ_DISC_ADJ */
+        }
+        if (dirty & FD_DIRTY_SCISSOR) {
+                struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+                OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+                OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+                OUT_RING(ring, xy2d(scissor->minx,       /* PA_SC_WINDOW_SCISSOR_TL */
+                                scissor->miny));
+                OUT_RING(ring, xy2d(scissor->maxx,       /* PA_SC_WINDOW_SCISSOR_BR */
+                                scissor->maxy));
+                ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
+                ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
+                ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
+                ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
+        }
+        if (dirty & FD_DIRTY_VIEWPORT) {
+                OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+                OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+                OUT_RING(ring, fui(ctx->viewport.scale[0]));       /* PA_CL_VPORT_XSCALE */
+                OUT_RING(ring, fui(ctx->viewport.translate[0]));   /* PA_CL_VPORT_XOFFSET */
+                OUT_RING(ring, fui(ctx->viewport.scale[1]));       /* PA_CL_VPORT_YSCALE */
+                OUT_RING(ring, fui(ctx->viewport.translate[1]));   /* PA_CL_VPORT_YOFFSET */
+                OUT_RING(ring, fui(ctx->viewport.scale[2]));       /* PA_CL_VPORT_ZSCALE */
+                OUT_RING(ring, fui(ctx->viewport.translate[2]));   /* PA_CL_VPORT_ZOFFSET */
+                OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+                OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
+                                A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+                                A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+                                A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+                                A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
+                                A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
+                                A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
+        }
+        if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTXSTATE | FD_DIRTY_TEXSTATE)) {
+                fd2_program_validate(ctx);
+                fd2_program_emit(ring, &ctx->prog);
+        }
+        if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+                emit_constants(ring,  VS_CONST_BASE * 4,
+                                &ctx->constbuf[PIPE_SHADER_VERTEX],
+                                (dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL);
+                emit_constants(ring, PS_CONST_BASE * 4,
+                                &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+                                (dirty & FD_DIRTY_PROG) ? ctx->prog.fp : NULL);
+        }
+        if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
+                OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
+                OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
+        }
+        if (dirty & FD_DIRTY_BLEND) {
+                OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+                OUT_RING(ring, blend->rb_blendcontrol);
+                OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+                OUT_RING(ring, blend->rb_colormask);
+        }
+        if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG))
+                emit_textures(ring, ctx);
+        ctx->dirty &= ~dirty;
+}
+/* emit per-context initialization:
+ */
+void
+fd2_emit_setup(struct fd_context *ctx)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
+        OUT_RING(ring, 0x00000002);
+        OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+        OUT_RING(ring, 0x00007fff);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_SQ_VS_CONST));
+        OUT_RING(ring, A2XX_SQ_VS_CONST_BASE(VS_CONST_BASE) |
+                        A2XX_SQ_VS_CONST_SIZE(0x100));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_SQ_PS_CONST));
+        OUT_RING(ring, A2XX_SQ_PS_CONST_BASE(PS_CONST_BASE) |
+                        A2XX_SQ_PS_CONST_SIZE(0xe0));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+        OUT_RING(ring, 0xffffffff);        /* VGT_MAX_VTX_INDX */
+        OUT_RING(ring, 0x00000000);        /* VGT_MIN_VTX_INDX */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+        OUT_RING(ring, 0x0000003b);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
+        OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_SQ_INTERPOLATOR_CNTL));
+        OUT_RING(ring, 0xffffffff);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_LINE_CNTL));
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+        OUT_RING(ring, 0x00000000);
+        // XXX we change this dynamically for draw/clear.. vs gmem<->mem..
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
+        OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
+        OUT_RING(ring, 0x88888888);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_DEST_MASK));
+        OUT_RING(ring, 0xffffffff);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_INFO));
+        OUT_RING(ring, A2XX_RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_RED |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_SQ_WRAPPING_0));
+        OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_0 */
+        OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_1 */
+        OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
+        OUT_RING(ring, 0x000005d0);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x5f601000);
+        OUT_RING(ring, 0x00000001);
+        OUT_PKT0(ring, REG_A2XX_SQ_INST_STORE_MANAGMENT, 1);
+        OUT_RING(ring, 0x00000180);
+        OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+        OUT_RING(ring, 0x00000300);
+        OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
+        OUT_RING(ring, 0x80000180);
+        /* not sure what this form of CP_SET_CONSTANT is.. */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 13);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x469c4000);
+        OUT_RING(ring, 0x3f800000);
+        OUT_RING(ring, 0x3f000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x40000000);
+        OUT_RING(ring, 0x3f400000);
+        OUT_RING(ring, 0x3ec00000);
+        OUT_RING(ring, 0x3e800000);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
+        OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
+                        A2XX_RB_COLOR_MASK_WRITE_GREEN |
+                        A2XX_RB_COLOR_MASK_WRITE_BLUE |
+                        A2XX_RB_COLOR_MASK_WRITE_ALPHA);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_RED));
+        OUT_RING(ring, 0x00000000);        /* RB_BLEND_RED */
+        OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
+        OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
+        OUT_RING(ring, 0x000000ff);        /* RB_BLEND_ALPHA */
+        fd_ringbuffer_flush(ring);
+        fd_ringmarker_mark(ctx->draw_start);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_emit.h
 ,0 → 1,48
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_EMIT_H
+#define FD2_EMIT_H
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+struct fd_ringbuffer;
+struct fd2_vertex_buf {
+        unsigned offset, size;
+        struct pipe_resource *prsc;
+};
+void fd2_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
+                struct fd2_vertex_buf *vbufs, uint32_t n);
+void fd2_emit_state(struct fd_context *ctx, uint32_t dirty);
+void fd2_emit_setup(struct fd_context *ctx);
+#endif /* FD2_EMIT_H */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
 ,0 → 1,404
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "freedreno_draw.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "fd2_gmem.h"
+#include "fd2_context.h"
+#include "fd2_emit.h"
+#include "fd2_program.h"
+#include "fd2_util.h"
+#include "fd2_zsa.h"
+static uint32_t fmt2swap(enum pipe_format format)
+{
+        switch (format) {
+        case PIPE_FORMAT_B8G8R8A8_UNORM:
+        /* TODO probably some more.. */
+                return 1;
+        default:
+                return 0;
+        }
+}
+/* transfer from gmem to system memory (ie. normal RAM) */
+static void
+emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base,
+                struct pipe_surface *psurf)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_resource *rsc = fd_resource(psurf->texture);
+        uint32_t swap = fmt2swap(psurf->format);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+        OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(swap) |
+                        A2XX_RB_COLOR_INFO_BASE(base) |
+                        A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
+        OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
+        OUT_RELOCW(ring, rsc->bo, 0, 0, 0);     /* RB_COPY_DEST_BASE */
+        OUT_RING(ring, rsc->slices[0].pitch >> 5); /* RB_COPY_DEST_PITCH */
+        OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
+                        A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(psurf->format)) |
+                        A2XX_RB_COPY_DEST_INFO_LINEAR |
+                        A2XX_RB_COPY_DEST_INFO_SWAP(swap) |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_RED |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
+                        A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
+        OUT_WFI (ring);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+        OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
+        OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
+        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+static void
+fd2_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd2_context *fd2_ctx = fd2_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
+                        { .prsc = fd2_ctx->solid_vertexbuf, .size = 48 },
+                }, 1);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+        OUT_RING(ring, 0x00000000);          /* PA_SC_WINDOW_OFFSET */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+        OUT_RING(ring, 0);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+        OUT_RING(ring, 0x0000028f);
+        fd2_program_emit(ring, &ctx->solid_prog);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+        OUT_RING(ring, 0x0000ffff);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+        OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
+        OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
+                        A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+        OUT_RING(ring, xy2d(0, 0));                       /* PA_SC_WINDOW_SCISSOR_TL */
+        OUT_RING(ring, xy2d(pfb->width, pfb->height));    /* PA_SC_WINDOW_SCISSOR_BR */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+        OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
+                        A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+                        A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+                        A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+                        A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
+        OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
+        OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
+                        A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
+        if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+                emit_gmem2mem_surf(ctx, tile->bin_w * tile->bin_h, pfb->zsbuf);
+        if (ctx->resolve & FD_BUFFER_COLOR)
+                emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
+        OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+}
+/* transfer from system memory to gmem */
+static void
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
+                struct pipe_surface *psurf)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_resource *rsc = fd_resource(psurf->texture);
+        uint32_t swiz;
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+        OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
+                        A2XX_RB_COLOR_INFO_BASE(base) |
+                        A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)));
+        swiz = fd2_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+                        PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA);
+        /* emit fb as a texture: */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+        OUT_RING(ring, 0x00010000);
+        OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
+                        A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
+                        A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
+                        A2XX_SQ_TEX_0_PITCH(rsc->slices[0].pitch));
+        OUT_RELOC(ring, rsc->bo, 0,
+                        fd2_pipe2surface(psurf->format) | 0x800, 0);
+        OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
+                        A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
+        OUT_RING(ring, 0x01000000 | // XXX
+                        swiz |
+                        A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
+                        A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000200);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
+        OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
+        OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
+        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+static void
+fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd2_context *fd2_ctx = fd2_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        unsigned bin_w = tile->bin_w;
+        unsigned bin_h = tile->bin_h;
+        float x0, y0, x1, y1;
+        fd2_emit_vertex_bufs(ring, 0x9c, (struct fd2_vertex_buf[]) {
+                        { .prsc = fd2_ctx->solid_vertexbuf, .size = 48, .offset = 0x30 },
+                        { .prsc = fd2_ctx->solid_vertexbuf, .size = 32, .offset = 0x60 },
+                }, 2);
+        /* write texture coordinates to vertexbuf: */
+        x0 = ((float)tile->xoff) / ((float)pfb->width);
+        x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
+        y0 = ((float)tile->yoff) / ((float)pfb->height);
+        y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
+        OUT_PKT3(ring, CP_MEM_WRITE, 9);
+        OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0x60, 0, 0);
+        OUT_RING(ring, fui(x0));
+        OUT_RING(ring, fui(y0));
+        OUT_RING(ring, fui(x1));
+        OUT_RING(ring, fui(y0));
+        OUT_RING(ring, fui(x0));
+        OUT_RING(ring, fui(y1));
+        OUT_RING(ring, fui(x1));
+        OUT_RING(ring, fui(y1));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
+        OUT_RING(ring, 0);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
+        OUT_RING(ring, 0x0000003b);
+        fd2_program_emit(ring, &ctx->blit_prog[0]);
+        OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
+        OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
+        OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
+        OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
+                        A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
+        OUT_RING(ring, 0x0000ffff);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
+        OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(PIPE_FUNC_ALWAYS) |
+                        A2XX_RB_COLORCONTROL_BLEND_DISABLE |
+                        A2XX_RB_COLORCONTROL_ROP_CODE(12) |
+                        A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
+                        A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
+        OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
+                        A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND_DST_PLUS_SRC) |
+                        A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
+                        A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
+                        A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND_DST_PLUS_SRC) |
+                        A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
+        OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
+                        xy2d(0,0));                     /* PA_SC_WINDOW_SCISSOR_TL */
+        OUT_RING(ring, xy2d(bin_w, bin_h));     /* PA_SC_WINDOW_SCISSOR_BR */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
+        OUT_RING(ring, fui((float)bin_w/2.0));  /* PA_CL_VPORT_XSCALE */
+        OUT_RING(ring, fui((float)bin_w/2.0));  /* PA_CL_VPORT_XOFFSET */
+        OUT_RING(ring, fui(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
+        OUT_RING(ring, fui((float)bin_h/2.0));  /* PA_CL_VPORT_YOFFSET */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
+        OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
+                        A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT |       // XXX check this???
+                        A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+                        A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+                        A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+                        A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
+        OUT_RING(ring, 0x00000000);
+        if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+                emit_mem2gmem_surf(ctx, bin_w * bin_h, pfb->zsbuf);
+        if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
+                emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0]);
+        /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
+}
+/* before first tile */
+static void
+fd2_emit_tile_init(struct fd_context *ctx)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+        uint32_t reg;
+        OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
+        OUT_RING(ring, gmem->bin_w);                 /* RB_SURFACE_INFO */
+        OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
+                        A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+        reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(align(gmem->bin_w * gmem->bin_h, 4));
+        if (pfb->zsbuf)
+                reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+        OUT_RING(ring, reg);                         /* RB_DEPTH_INFO */
+}
+/* before mem2gmem */
+static void
+fd2_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+        OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
+                        A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+        /* setup screen scissor for current tile (same for mem2gmem): */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
+        OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
+                        A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
+                        A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
+}
+/* before IB to rendering cmds: */
+static void
+fd2_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
+        OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
+                        A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
+        /* setup window scissor and offset for current tile (different
+         * from mem2gmem):
+         */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
+        OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
+                        A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
+}
+void
+fd2_gmem_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->emit_tile_init = fd2_emit_tile_init;
+        ctx->emit_tile_prep = fd2_emit_tile_prep;
+        ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
+        ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
+        ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_gmem.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_GMEM_H_
+#define FD2_GMEM_H_
+#include "pipe/p_context.h"
+void fd2_gmem_init(struct pipe_context *pctx);
+#endif /* FD2_GMEM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_program.c
 ,0 → 1,479
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "freedreno_program.h"
+#include "fd2_program.h"
+#include "fd2_compiler.h"
+#include "fd2_texture.h"
+#include "fd2_util.h"
+static struct fd2_shader_stateobj *
+create_shader(enum shader_t type)
+{
+        struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
+        if (!so)
+                return NULL;
+        so->type = type;
+        return so;
+}
+static void
+delete_shader(struct fd2_shader_stateobj *so)
+{
+        ir2_shader_destroy(so->ir);
+        free(so->tokens);
+        free(so->bin);
+        free(so);
+}
+static struct fd2_shader_stateobj *
+assemble(struct fd2_shader_stateobj *so)
+{
+        free(so->bin);
+        so->bin = ir2_shader_assemble(so->ir, &so->info);
+        if (!so->bin)
+                goto fail;
+        if (fd_mesa_debug & FD_DBG_DISASM) {
+                DBG("disassemble: type=%d", so->type);
+                disasm_a2xx(so->bin, so->info.sizedwords, 0, so->type);
+        }
+        return so;
+fail:
+        debug_error("assemble failed!");
+        delete_shader(so);
+        return NULL;
+}
+static struct fd2_shader_stateobj *
+compile(struct fd_program_stateobj *prog, struct fd2_shader_stateobj *so)
+{
+        int ret;
+        if (fd_mesa_debug & FD_DBG_DISASM) {
+                DBG("dump tgsi: type=%d", so->type);
+                tgsi_dump(so->tokens, 0);
+        }
+        ret = fd2_compile_shader(prog, so);
+        if (ret)
+                goto fail;
+        /* NOTE: we don't assemble yet because for VS we don't know the
+         * type information for vertex fetch yet.. so those need to be
+         * patched up later before assembling.
+         */
+        so->info.sizedwords = 0;
+        return so;
+fail:
+        debug_error("compile failed!");
+        delete_shader(so);
+        return NULL;
+}
+static void
+emit(struct fd_ringbuffer *ring, struct fd2_shader_stateobj *so)
+{
+        unsigned i;
+        if (so->info.sizedwords == 0)
+                assemble(so);
+        OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
+        OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
+        OUT_RING(ring, so->info.sizedwords);
+        for (i = 0; i < so->info.sizedwords; i++)
+                OUT_RING(ring, so->bin[i]);
+}
+static void *
+fd2_fp_state_create(struct pipe_context *pctx,
+                const struct pipe_shader_state *cso)
+{
+        struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+        if (!so)
+                return NULL;
+        so->tokens = tgsi_dup_tokens(cso->tokens);
+        return so;
+}
+static void
+fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd2_shader_stateobj *so = hwcso;
+        delete_shader(so);
+}
+static void *
+fd2_vp_state_create(struct pipe_context *pctx,
+                const struct pipe_shader_state *cso)
+{
+        struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
+        if (!so)
+                return NULL;
+        so->tokens = tgsi_dup_tokens(cso->tokens);
+        return so;
+}
+static void
+fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd2_shader_stateobj *so = hwcso;
+        delete_shader(so);
+}
+static void
+patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
+                struct fd_vertex_stateobj *vtx)
+{
+        unsigned i;
+        assert(so->num_vfetch_instrs == vtx->num_elements);
+        /* update vtx fetch instructions: */
+        for (i = 0; i < so->num_vfetch_instrs; i++) {
+                struct ir2_instruction *instr = so->vfetch_instrs[i];
+                struct pipe_vertex_element *elem = &vtx->pipe[i];
+                struct pipe_vertex_buffer *vb =
+                                &ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
+                enum pipe_format format = elem->src_format;
+                const struct util_format_description *desc =
+                                util_format_description(format);
+                unsigned j;
+                /* Find the first non-VOID channel. */
+                for (j = 0; j < 4; j++)
+                        if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
+                                break;
+                /* CI/CIS can probably be set in compiler instead: */
+                instr->fetch.const_idx = 20 + (i / 3);
+                instr->fetch.const_idx_sel = i % 3;
+                instr->fetch.fmt = fd2_pipe2surface(format);
+                instr->fetch.is_normalized = desc->channel[j].normalized;
+                instr->fetch.is_signed =
+                                desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
+                instr->fetch.stride = vb->stride ? : 1;
+                instr->fetch.offset = elem->src_offset;
+                for (j = 0; j < 4; j++)
+                        instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]];
+                assert(instr->fetch.fmt != ~0);
+                DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
+                                "stride=%d, offset=%d",
+                                i, util_format_name(format),
+                                instr->fetch.fmt,
+                                instr->fetch.const_idx,
+                                instr->fetch.const_idx_sel,
+                                elem->instance_divisor,
+                                instr->regs[0]->swizzle,
+                                instr->fetch.stride,
+                                instr->fetch.offset);
+        }
+        /* trigger re-assemble: */
+        so->info.sizedwords = 0;
+}
+static void
+patch_tex_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
+                struct fd_texture_stateobj *tex)
+{
+        unsigned i;
+        /* update tex fetch instructions: */
+        for (i = 0; i < so->num_tfetch_instrs; i++) {
+                struct ir2_instruction *instr = so->tfetch_instrs[i].instr;
+                unsigned samp_id = so->tfetch_instrs[i].samp_id;
+                unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
+                if (const_idx != instr->fetch.const_idx) {
+                        instr->fetch.const_idx = const_idx;
+                        /* trigger re-assemble: */
+                        so->info.sizedwords = 0;
+                }
+        }
+}
+void
+fd2_program_validate(struct fd_context *ctx)
+{
+        struct fd_program_stateobj *prog = &ctx->prog;
+        /* if vertex or frag shader is dirty, we may need to recompile. Compile
+         * frag shader first, as that assigns the register slots for exports
+         * from the vertex shader.  And therefore if frag shader has changed we
+         * need to recompile both vert and frag shader.
+         */
+        if (prog->dirty & FD_SHADER_DIRTY_FP)
+                compile(prog, prog->fp);
+        if (prog->dirty & (FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP))
+                compile(prog, prog->vp);
+        if (prog->dirty)
+                ctx->dirty |= FD_DIRTY_PROG;
+        /* if necessary, fix up vertex fetch instructions: */
+        if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
+                patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx);
+        /* if necessary, fix up texture fetch instructions: */
+        if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
+                patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
+                patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
+        }
+}
+void
+fd2_program_emit(struct fd_ringbuffer *ring,
+                struct fd_program_stateobj *prog)
+{
+        struct ir2_shader_info *vsi =
+                &((struct fd2_shader_stateobj *)prog->vp)->info;
+        struct ir2_shader_info *fsi =
+                &((struct fd2_shader_stateobj *)prog->fp)->info;
+        uint8_t vs_gprs, fs_gprs, vs_export;
+        emit(ring, prog->vp);
+        emit(ring, prog->fp);
+        vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
+        fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
+        vs_export = MAX2(1, prog->num_exports) - 1;
+        OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+        OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
+        OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
+                        A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
+                        A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
+                        A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
+                        A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
+                        A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
+        prog->dirty = 0;
+}
+/* Creates shader:
+ *    EXEC ADDR(0x2) CNT(0x1)
+ *       (S)FETCH:      SAMPLE  R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x3) CNT(0x1)
+ *          ALU:        MAXv    export0 = R0, R0        ; gl_FragColor
+ *    NOP
+ */
+static struct fd2_shader_stateobj *
+create_blit_fp(void)
+{
+        struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+        struct ir2_cf *cf;
+        struct ir2_instruction *instr;
+        if (!so)
+                return NULL;
+        so->ir = ir2_shader_create();
+        cf = ir2_cf_create(so->ir, EXEC);
+        instr = ir2_instr_create_tex_fetch(cf, 0);
+        ir2_reg_create(instr, 0, "xyzw", 0);
+        ir2_reg_create(instr, 0, "xyx", 0);
+        instr->sync = true;
+        cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+        cf = ir2_cf_create(so->ir, EXEC_END);
+        instr = ir2_instr_create_alu(cf, MAXv, ~0);
+        ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
+        ir2_reg_create(instr, 0, NULL, 0);
+        ir2_reg_create(instr, 0, NULL, 0);
+        return assemble(so);
+}
+/* Creates shader:
+*     EXEC ADDR(0x3) CNT(0x2)
+*           FETCH:      VERTEX  R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
+*           FETCH:      VERTEX  R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
+*     ALLOC POSITION SIZE(0x0)
+*     EXEC ADDR(0x5) CNT(0x1)
+*           ALU:        MAXv    export62 = R2, R2       ; gl_Position
+*     ALLOC PARAM/PIXEL SIZE(0x0)
+*     EXEC_END ADDR(0x6) CNT(0x1)
+*           ALU:        MAXv    export0 = R1, R1
+*     NOP
+ */
+static struct fd2_shader_stateobj *
+create_blit_vp(void)
+{
+        struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
+        struct ir2_cf *cf;
+        struct ir2_instruction *instr;
+        if (!so)
+                return NULL;
+        so->ir = ir2_shader_create();
+        cf = ir2_cf_create(so->ir, EXEC);
+        instr = ir2_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8);
+        instr->fetch.is_normalized = true;
+        ir2_reg_create(instr, 1, "xy01", 0);
+        ir2_reg_create(instr, 0, "x", 0);
+        instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
+        instr->fetch.is_normalized = true;
+        ir2_reg_create(instr, 2, "xyz1", 0);
+        ir2_reg_create(instr, 0, "x", 0);
+        cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
+        cf = ir2_cf_create(so->ir, EXEC);
+        instr = ir2_instr_create_alu(cf, MAXv, ~0);
+        ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
+        ir2_reg_create(instr, 2, NULL, 0);
+        ir2_reg_create(instr, 2, NULL, 0);
+        cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+        cf = ir2_cf_create(so->ir, EXEC_END);
+        instr = ir2_instr_create_alu(cf, MAXv, ~0);
+        ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
+        ir2_reg_create(instr, 1, NULL, 0);
+        ir2_reg_create(instr, 1, NULL, 0);
+        return assemble(so);
+}
+/* Creates shader:
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x1) CNT(0x1)
+ *          ALU:        MAXv    export0 = C0, C0        ; gl_FragColor
+ */
+static struct fd2_shader_stateobj *
+create_solid_fp(void)
+{
+        struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+        struct ir2_cf *cf;
+        struct ir2_instruction *instr;
+        if (!so)
+                return NULL;
+        so->ir = ir2_shader_create();
+        cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+        cf = ir2_cf_create(so->ir, EXEC_END);
+        instr = ir2_instr_create_alu(cf, MAXv, ~0);
+        ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
+        ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
+        ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
+        return assemble(so);
+}
+/* Creates shader:
+ *    EXEC ADDR(0x3) CNT(0x1)
+ *       (S)FETCH:      VERTEX  R1.xyz1 = R0.x FMT_32_32_32_FLOAT
+ *                           UNSIGNED STRIDE(12) CONST(26, 0)
+ *    ALLOC POSITION SIZE(0x0)
+ *    EXEC ADDR(0x4) CNT(0x1)
+ *          ALU:        MAXv    export62 = R1, R1       ; gl_Position
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x5) CNT(0x0)
+ */
+static struct fd2_shader_stateobj *
+create_solid_vp(void)
+{
+        struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
+        struct ir2_cf *cf;
+        struct ir2_instruction *instr;
+        if (!so)
+                return NULL;
+        so->ir = ir2_shader_create();
+        cf = ir2_cf_create(so->ir, EXEC);
+        instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
+        ir2_reg_create(instr, 1, "xyz1", 0);
+        ir2_reg_create(instr, 0, "x", 0);
+        cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
+        cf = ir2_cf_create(so->ir, EXEC);
+        instr = ir2_instr_create_alu(cf, MAXv, ~0);
+        ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
+        ir2_reg_create(instr, 1, NULL, 0);
+        ir2_reg_create(instr, 1, NULL, 0);
+        cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+        cf = ir2_cf_create(so->ir, EXEC_END);
+        return assemble(so);
+}
+void
+fd2_prog_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        pctx->create_fs_state = fd2_fp_state_create;
+        pctx->delete_fs_state = fd2_fp_state_delete;
+        pctx->create_vs_state = fd2_vp_state_create;
+        pctx->delete_vs_state = fd2_vp_state_delete;
+        fd_prog_init(pctx);
+        ctx->solid_prog.fp = create_solid_fp();
+        ctx->solid_prog.vp = create_solid_vp();
+        ctx->blit_prog[0].fp = create_blit_fp();
+        ctx->blit_prog[0].vp = create_blit_vp();
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_program.h
 ,0 → 1,81
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_PROGRAM_H_
+#define FD2_PROGRAM_H_
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "ir-a2xx.h"
+#include "disasm.h"
+struct fd2_shader_stateobj {
+        enum shader_t type;
+        uint32_t *bin;
+        struct tgsi_token *tokens;
+        /* note that we defer compiling shader until we know both vs and ps..
+         * and if one changes, we potentially need to recompile in order to
+         * get varying linkages correct:
+         */
+        struct ir2_shader_info info;
+        struct ir2_shader *ir;
+        /* for vertex shaders, the fetch instructions which need to be
+         * patched up before assembly:
+         */
+        unsigned num_vfetch_instrs;
+        struct ir2_instruction *vfetch_instrs[64];
+        /* for all shaders, any tex fetch instructions which need to be
+         * patched before assembly:
+         */
+        unsigned num_tfetch_instrs;
+        struct {
+                unsigned samp_id;
+                struct ir2_instruction *instr;
+        } tfetch_instrs[64];
+        unsigned first_immediate;     /* const reg # of first immediate */
+        unsigned num_immediates;
+        struct {
+                uint32_t val[4];
+        } immediates[64];
+};
+void fd2_program_emit(struct fd_ringbuffer *ring,
+                struct fd_program_stateobj *prog);
+void fd2_program_validate(struct fd_context *ctx);
+void fd2_prog_init(struct pipe_context *pctx);
+#endif /* FD2_PROGRAM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.c
 ,0 → 1,113
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd2_rasterizer.h"
+#include "fd2_context.h"
+#include "fd2_util.h"
+void *
+fd2_rasterizer_state_create(struct pipe_context *pctx,
+                const struct pipe_rasterizer_state *cso)
+{
+        struct fd2_rasterizer_stateobj *so;
+        float psize_min, psize_max;
+        so = CALLOC_STRUCT(fd2_rasterizer_stateobj);
+        if (!so)
+                return NULL;
+        if (cso->point_size_per_vertex) {
+                psize_min = util_get_min_point_size(cso);
+                psize_max = 8192;
+        } else {
+                /* Force the point size to be as if the vertex output was disabled. */
+                psize_min = cso->point_size;
+                psize_max = cso->point_size;
+        }
+        so->base = *cso;
+        so->pa_sc_line_stipple = cso->line_stipple_enable ?
+                A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
+                A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor) : 0;
+        so->pa_cl_clip_cntl = 0; // TODO
+        so->pa_su_vtx_cntl =
+                A2XX_PA_SU_VTX_CNTL_PIX_CENTER(cso->half_pixel_center ? PIXCENTER_OGL : PIXCENTER_D3D) |
+                A2XX_PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
+        so->pa_su_point_size =
+                A2XX_PA_SU_POINT_SIZE_HEIGHT(cso->point_size/2) |
+                A2XX_PA_SU_POINT_SIZE_WIDTH(cso->point_size/2);
+        so->pa_su_point_minmax =
+                A2XX_PA_SU_POINT_MINMAX_MIN(psize_min/2) |
+                A2XX_PA_SU_POINT_MINMAX_MAX(psize_max/2);
+        so->pa_su_line_cntl =
+                A2XX_PA_SU_LINE_CNTL_WIDTH(cso->line_width/2);
+        so->pa_su_sc_mode_cntl =
+                A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
+                A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+                A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+        if (cso->cull_face & PIPE_FACE_FRONT)
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT;
+        if (cso->cull_face & PIPE_FACE_BACK)
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK;
+        if (!cso->flatshade_first)
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
+        if (!cso->front_ccw)
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_FACE;
+        if (cso->line_stipple_enable)
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
+        if (cso->multisample)
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
+        if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+                        cso->fill_back != PIPE_POLYGON_MODE_FILL)
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
+        else
+                so->pa_su_sc_mode_cntl |= A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
+        if (cso->offset_tri)
+                so->pa_su_sc_mode_cntl |=
+                        A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
+                        A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
+                        A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
 ,0 → 1,55
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_RASTERIZER_H_
+#define FD2_RASTERIZER_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+struct fd2_rasterizer_stateobj {
+        struct pipe_rasterizer_state base;
+        uint32_t pa_sc_line_stipple;
+        uint32_t pa_cl_clip_cntl;
+        uint32_t pa_su_vtx_cntl;
+        uint32_t pa_su_point_size;
+        uint32_t pa_su_point_minmax;
+        uint32_t pa_su_line_cntl;
+        uint32_t pa_su_sc_mode_cntl;
+};
+static INLINE struct fd2_rasterizer_stateobj *
+fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+        return (struct fd2_rasterizer_stateobj *)rast;
+}
+void * fd2_rasterizer_state_create(struct pipe_context *pctx,
+                const struct pipe_rasterizer_state *cso);
+#endif /* FD2_RASTERIZER_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_screen.c
 ,0 → 1,110
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+#include "fd2_screen.h"
+#include "fd2_context.h"
+#include "fd2_util.h"
+static boolean
+fd2_screen_is_format_supported(struct pipe_screen *pscreen,
+                enum pipe_format format,
+                enum pipe_texture_target target,
+                unsigned sample_count,
+                unsigned usage)
+{
+        unsigned retval = 0;
+        if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+                        (sample_count > 1) || /* TODO add MSAA */
+                        !util_format_is_supported(format, usage)) {
+                DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+                                util_format_name(format), target, sample_count, usage);
+                return FALSE;
+        }
+        /* TODO figure out how to render to other formats.. */
+        if ((usage & PIPE_BIND_RENDER_TARGET) &&
+                        ((format != PIPE_FORMAT_B8G8R8A8_UNORM) &&
+                         (format != PIPE_FORMAT_B8G8R8X8_UNORM))) {
+                DBG("not supported render target: format=%s, target=%d, sample_count=%d, usage=%x",
+                                util_format_name(format), target, sample_count, usage);
+                return FALSE;
+        }
+        if ((usage & (PIPE_BIND_SAMPLER_VIEW |
+                                PIPE_BIND_VERTEX_BUFFER)) &&
+                        (fd2_pipe2surface(format) != ~0)) {
+                retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
+                                PIPE_BIND_VERTEX_BUFFER);
+        }
+        if ((usage & (PIPE_BIND_RENDER_TARGET |
+                                PIPE_BIND_DISPLAY_TARGET |
+                                PIPE_BIND_SCANOUT |
+                                PIPE_BIND_SHARED)) &&
+                        (fd2_pipe2color(format) != ~0)) {
+                retval |= usage & (PIPE_BIND_RENDER_TARGET |
+                                PIPE_BIND_DISPLAY_TARGET |
+                                PIPE_BIND_SCANOUT |
+                                PIPE_BIND_SHARED);
+        }
+        if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+                        (fd_pipe2depth(format) != ~0)) {
+                retval |= PIPE_BIND_DEPTH_STENCIL;
+        }
+        if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+                        (fd_pipe2index(format) != ~0)) {
+                retval |= PIPE_BIND_INDEX_BUFFER;
+        }
+        if (usage & PIPE_BIND_TRANSFER_READ)
+                retval |= PIPE_BIND_TRANSFER_READ;
+        if (usage & PIPE_BIND_TRANSFER_WRITE)
+                retval |= PIPE_BIND_TRANSFER_WRITE;
+        if (retval != usage) {
+                DBG("not supported: format=%s, target=%d, sample_count=%d, "
+                                "usage=%x, retval=%x", util_format_name(format),
+                                target, sample_count, usage, retval);
+        }
+        return retval == usage;
+}
+void
+fd2_screen_init(struct pipe_screen *pscreen)
+{
+        fd_screen(pscreen)->max_rts = 1;
+        pscreen->context_create = fd2_context_create;
+        pscreen->is_format_supported = fd2_screen_is_format_supported;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_screen.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_SCREEN_H_
+#define FD2_SCREEN_H_
+#include "pipe/p_screen.h"
+void fd2_screen_init(struct pipe_screen *pscreen);
+#endif /* FD2_SCREEN_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_texture.c
 ,0 → 1,178
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "fd2_texture.h"
+#include "fd2_util.h"
+static enum sq_tex_clamp
+tex_clamp(unsigned wrap)
+{
+        switch (wrap) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return SQ_TEX_WRAP;
+        case PIPE_TEX_WRAP_CLAMP:
+                return SQ_TEX_CLAMP_HALF_BORDER;
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return SQ_TEX_CLAMP_LAST_TEXEL;
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+                return SQ_TEX_CLAMP_BORDER;
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return SQ_TEX_MIRROR;
+        case PIPE_TEX_WRAP_MIRROR_CLAMP:
+                return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+                return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+                return SQ_TEX_MIRROR_ONCE_BORDER;
+        default:
+                DBG("invalid wrap: %u", wrap);
+                return 0;
+        }
+}
+static enum sq_tex_filter
+tex_filter(unsigned filter)
+{
+        switch (filter) {
+        case PIPE_TEX_FILTER_NEAREST:
+                return SQ_TEX_FILTER_POINT;
+        case PIPE_TEX_FILTER_LINEAR:
+                return SQ_TEX_FILTER_BILINEAR;
+        default:
+                DBG("invalid filter: %u", filter);
+                return 0;
+        }
+}
+static void *
+fd2_sampler_state_create(struct pipe_context *pctx,
+                const struct pipe_sampler_state *cso)
+{
+        struct fd2_sampler_stateobj *so = CALLOC_STRUCT(fd2_sampler_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        /* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
+        so->tex0 =
+                A2XX_SQ_TEX_0_CLAMP_X(tex_clamp(cso->wrap_s)) |
+                A2XX_SQ_TEX_0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
+                A2XX_SQ_TEX_0_CLAMP_Z(tex_clamp(cso->wrap_r));
+        so->tex3 =
+                A2XX_SQ_TEX_3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
+                A2XX_SQ_TEX_3_XY_MIN_FILTER(tex_filter(cso->min_img_filter));
+        so->tex4 = 0x00000000; /* ??? */
+        so->tex5 = 0x00000200; /* ??? */
+        return so;
+}
+static void
+fd2_sampler_states_bind(struct pipe_context *pctx,
+                unsigned shader, unsigned start,
+                unsigned nr, void **hwcso)
+{
+        if (shader == PIPE_SHADER_FRAGMENT) {
+                struct fd_context *ctx = fd_context(pctx);
+                /* on a2xx, since there is a flat address space for textures/samplers,
+                 * a change in # of fragment textures/samplers will trigger patching and
+                 * re-emitting the vertex shader:
+                 */
+                if (nr != ctx->fragtex.num_samplers)
+                        ctx->dirty |= FD_DIRTY_TEXSTATE;
+        }
+        fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+}
+static struct pipe_sampler_view *
+fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+                const struct pipe_sampler_view *cso)
+{
+        struct fd2_pipe_sampler_view *so = CALLOC_STRUCT(fd2_pipe_sampler_view);
+        struct fd_resource *rsc = fd_resource(prsc);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        pipe_reference(NULL, &prsc->reference);
+        so->base.texture = prsc;
+        so->base.reference.count = 1;
+        so->base.context = pctx;
+        so->fmt = fd2_pipe2surface(cso->format);
+        so->tex0 = A2XX_SQ_TEX_0_PITCH(rsc->slices[0].pitch);
+        so->tex2 =
+                A2XX_SQ_TEX_2_HEIGHT(prsc->height0 - 1) |
+                A2XX_SQ_TEX_2_WIDTH(prsc->width0 - 1);
+        so->tex3 = fd2_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+                        cso->swizzle_b, cso->swizzle_a);
+        return &so->base;
+}
+/* map gallium sampler-id to hw const-idx.. adreno uses a flat address
+ * space of samplers (const-idx), so we need to map the gallium sampler-id
+ * which is per-shader to a global const-idx space.
+ *
+ * Fragment shader sampler maps directly to const-idx, and vertex shader
+ * is offset by the # of fragment shader samplers.  If the # of fragment
+ * shader samplers changes, this shifts the vertex shader indexes.
+ *
+ * TODO maybe we can do frag shader 0..N  and vert shader N..0 to avoid
+ * this??
+ */
+unsigned
+fd2_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
+                unsigned samp_id)
+{
+        if (tex == &ctx->fragtex)
+                return samp_id;
+        return samp_id + ctx->fragtex.num_samplers;
+}
+void
+fd2_texture_init(struct pipe_context *pctx)
+{
+        pctx->create_sampler_state = fd2_sampler_state_create;
+        pctx->bind_sampler_states = fd2_sampler_states_bind;
+        pctx->create_sampler_view = fd2_sampler_view_create;
+        pctx->set_sampler_views = fd_set_sampler_views;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
 ,0 → 1,68
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_TEXTURE_H_
+#define FD2_TEXTURE_H_
+#include "pipe/p_context.h"
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+#include "fd2_context.h"
+#include "fd2_util.h"
+struct fd2_sampler_stateobj {
+        struct pipe_sampler_state base;
+        uint32_t tex0, tex3, tex4, tex5;
+};
+static INLINE struct fd2_sampler_stateobj *
+fd2_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+        return (struct fd2_sampler_stateobj *)samp;
+}
+struct fd2_pipe_sampler_view {
+        struct pipe_sampler_view base;
+        enum a2xx_sq_surfaceformat fmt;
+        uint32_t tex0, tex2, tex3;
+};
+static INLINE struct fd2_pipe_sampler_view *
+fd2_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+        return (struct fd2_pipe_sampler_view *)pview;
+}
+unsigned fd2_get_const_idx(struct fd_context *ctx,
+                struct fd_texture_stateobj *tex, unsigned samp_id);
+void fd2_texture_init(struct pipe_context *pctx);
+#endif /* FD2_TEXTURE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_util.c
 ,0 → 1,322
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+#include "fd2_util.h"
+enum a2xx_sq_surfaceformat
+fd2_pipe2surface(enum pipe_format format)
+{
+        switch (format) {
+        /* 8-bit buffers. */
+        case PIPE_FORMAT_A8_UNORM:
+        case PIPE_FORMAT_A8_SNORM:
+        case PIPE_FORMAT_A8_UINT:
+        case PIPE_FORMAT_A8_SINT:
+        case PIPE_FORMAT_I8_UNORM:
+        case PIPE_FORMAT_I8_SNORM:
+        case PIPE_FORMAT_I8_UINT:
+        case PIPE_FORMAT_I8_SINT:
+        case PIPE_FORMAT_L8_UNORM:
+        case PIPE_FORMAT_L8_SNORM:
+        case PIPE_FORMAT_L8_UINT:
+        case PIPE_FORMAT_L8_SINT:
+        case PIPE_FORMAT_L8_SRGB:
+        case PIPE_FORMAT_R8_UNORM:
+        case PIPE_FORMAT_R8_SNORM:
+        case PIPE_FORMAT_R8_UINT:
+        case PIPE_FORMAT_R8_SINT:
+                return FMT_8;
+        /* 16-bit buffers. */
+        case PIPE_FORMAT_B5G6R5_UNORM:
+                return FMT_5_6_5;
+        case PIPE_FORMAT_B5G5R5A1_UNORM:
+        case PIPE_FORMAT_B5G5R5X1_UNORM:
+                return FMT_1_5_5_5;
+        case PIPE_FORMAT_B4G4R4A4_UNORM:
+        case PIPE_FORMAT_B4G4R4X4_UNORM:
+                return FMT_4_4_4_4;
+        case PIPE_FORMAT_Z16_UNORM:
+                return FMT_16;
+        case PIPE_FORMAT_L8A8_UNORM:
+        case PIPE_FORMAT_L8A8_SNORM:
+        case PIPE_FORMAT_L8A8_UINT:
+        case PIPE_FORMAT_L8A8_SINT:
+        case PIPE_FORMAT_L8A8_SRGB:
+        case PIPE_FORMAT_R8G8_UNORM:
+        case PIPE_FORMAT_R8G8_SNORM:
+        case PIPE_FORMAT_R8G8_UINT:
+        case PIPE_FORMAT_R8G8_SINT:
+                return FMT_8_8;
+        case PIPE_FORMAT_R16_UNORM:
+        case PIPE_FORMAT_R16_SNORM:
+        case PIPE_FORMAT_R16_UINT:
+        case PIPE_FORMAT_R16_SINT:
+        case PIPE_FORMAT_A16_UNORM:
+        case PIPE_FORMAT_A16_SNORM:
+        case PIPE_FORMAT_A16_UINT:
+        case PIPE_FORMAT_A16_SINT:
+        case PIPE_FORMAT_L16_UNORM:
+        case PIPE_FORMAT_L16_SNORM:
+        case PIPE_FORMAT_L16_UINT:
+        case PIPE_FORMAT_L16_SINT:
+        case PIPE_FORMAT_I16_UNORM:
+        case PIPE_FORMAT_I16_SNORM:
+        case PIPE_FORMAT_I16_UINT:
+        case PIPE_FORMAT_I16_SINT:
+                return FMT_16;
+        case PIPE_FORMAT_R16_FLOAT:
+        case PIPE_FORMAT_A16_FLOAT:
+        case PIPE_FORMAT_L16_FLOAT:
+        case PIPE_FORMAT_I16_FLOAT:
+                return FMT_16_FLOAT;
+        /* 32-bit buffers. */
+        case PIPE_FORMAT_A8B8G8R8_SRGB:
+        case PIPE_FORMAT_A8B8G8R8_UNORM:
+        case PIPE_FORMAT_A8R8G8B8_UNORM:
+        case PIPE_FORMAT_B8G8R8A8_SRGB:
+        case PIPE_FORMAT_B8G8R8A8_UNORM:
+        case PIPE_FORMAT_B8G8R8X8_UNORM:
+        case PIPE_FORMAT_R8G8B8A8_SNORM:
+        case PIPE_FORMAT_R8G8B8A8_UNORM:
+        case PIPE_FORMAT_R8G8B8X8_UNORM:
+        case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+        case PIPE_FORMAT_X8B8G8R8_UNORM:
+        case PIPE_FORMAT_X8R8G8B8_UNORM:
+        case PIPE_FORMAT_R8G8B8_UNORM:
+        case PIPE_FORMAT_R8G8B8A8_SINT:
+        case PIPE_FORMAT_R8G8B8A8_UINT:
+                return FMT_8_8_8_8;
+        case PIPE_FORMAT_R10G10B10A2_UNORM:
+        case PIPE_FORMAT_R10G10B10X2_SNORM:
+        case PIPE_FORMAT_B10G10R10A2_UNORM:
+        case PIPE_FORMAT_B10G10R10A2_UINT:
+        case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+                return FMT_2_10_10_10;
+        case PIPE_FORMAT_Z24X8_UNORM:
+        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+                return FMT_24_8;
+        case PIPE_FORMAT_R32_UINT:
+        case PIPE_FORMAT_R32_SINT:
+        case PIPE_FORMAT_A32_UINT:
+        case PIPE_FORMAT_A32_SINT:
+        case PIPE_FORMAT_L32_UINT:
+        case PIPE_FORMAT_L32_SINT:
+        case PIPE_FORMAT_I32_UINT:
+        case PIPE_FORMAT_I32_SINT:
+                return FMT_32;
+        case PIPE_FORMAT_R32_FLOAT:
+        case PIPE_FORMAT_A32_FLOAT:
+        case PIPE_FORMAT_L32_FLOAT:
+        case PIPE_FORMAT_I32_FLOAT:
+        case PIPE_FORMAT_Z32_FLOAT:
+                return FMT_32_FLOAT;
+        case PIPE_FORMAT_R16G16_FLOAT:
+        case PIPE_FORMAT_L16A16_FLOAT:
+                return FMT_16_16_FLOAT;
+        case PIPE_FORMAT_R16G16_UNORM:
+        case PIPE_FORMAT_R16G16_SNORM:
+        case PIPE_FORMAT_R16G16_UINT:
+        case PIPE_FORMAT_R16G16_SINT:
+        case PIPE_FORMAT_L16A16_UNORM:
+        case PIPE_FORMAT_L16A16_SNORM:
+        case PIPE_FORMAT_L16A16_UINT:
+        case PIPE_FORMAT_L16A16_SINT:
+                return FMT_16_16;
+        /* 64-bit buffers. */
+        case PIPE_FORMAT_R16G16B16A16_UINT:
+        case PIPE_FORMAT_R16G16B16A16_SINT:
+        case PIPE_FORMAT_R16G16B16A16_UNORM:
+        case PIPE_FORMAT_R16G16B16A16_SNORM:
+                return FMT_16_16_16_16;
+        case PIPE_FORMAT_R16G16B16A16_FLOAT:
+                return FMT_16_16_16_16_FLOAT;
+        case PIPE_FORMAT_R32G32_FLOAT:
+        case PIPE_FORMAT_L32A32_FLOAT:
+                return FMT_32_32_FLOAT;
+        case PIPE_FORMAT_R32G32_SINT:
+        case PIPE_FORMAT_R32G32_UINT:
+        case PIPE_FORMAT_L32A32_UINT:
+        case PIPE_FORMAT_L32A32_SINT:
+                return FMT_32_32;
+        /* 96-bit buffers. */
+        case PIPE_FORMAT_R32G32B32_FLOAT:
+                return FMT_32_32_32_FLOAT;
+        /* 128-bit buffers. */
+        case PIPE_FORMAT_R32G32B32A32_SNORM:
+        case PIPE_FORMAT_R32G32B32A32_UNORM:
+        case PIPE_FORMAT_R32G32B32A32_SINT:
+        case PIPE_FORMAT_R32G32B32A32_UINT:
+                return FMT_32_32_32_32;
+        case PIPE_FORMAT_R32G32B32A32_FLOAT:
+                return FMT_32_32_32_32_FLOAT;
+        /* YUV buffers. */
+        case PIPE_FORMAT_UYVY:
+                return FMT_Cr_Y1_Cb_Y0;
+        case PIPE_FORMAT_YUYV:
+                return FMT_Y1_Cr_Y0_Cb;
+        default:
+                return ~0;
+        }
+}
+enum a2xx_colorformatx
+fd2_pipe2color(enum pipe_format format)
+{
+        switch (format) {
+        /* 8-bit buffers. */
+        case PIPE_FORMAT_A8_UNORM:
+        case PIPE_FORMAT_A8_SNORM:
+        case PIPE_FORMAT_A8_UINT:
+        case PIPE_FORMAT_A8_SINT:
+        case PIPE_FORMAT_I8_UNORM:
+        case PIPE_FORMAT_I8_SNORM:
+        case PIPE_FORMAT_I8_UINT:
+        case PIPE_FORMAT_I8_SINT:
+        case PIPE_FORMAT_L8_UNORM:
+        case PIPE_FORMAT_L8_SNORM:
+        case PIPE_FORMAT_L8_UINT:
+        case PIPE_FORMAT_L8_SINT:
+        case PIPE_FORMAT_L8_SRGB:
+        case PIPE_FORMAT_R8_UNORM:
+        case PIPE_FORMAT_R8_SNORM:
+        case PIPE_FORMAT_R8_UINT:
+        case PIPE_FORMAT_R8_SINT:
+                return COLORX_8;
+        /* 16-bit buffers. */
+        case PIPE_FORMAT_B5G6R5_UNORM:
+                return COLORX_5_6_5;
+        case PIPE_FORMAT_B5G5R5A1_UNORM:
+        case PIPE_FORMAT_B5G5R5X1_UNORM:
+                return COLORX_1_5_5_5;
+        case PIPE_FORMAT_B4G4R4A4_UNORM:
+        case PIPE_FORMAT_B4G4R4X4_UNORM:
+                return COLORX_4_4_4_4;
+        case PIPE_FORMAT_L8A8_UNORM:
+        case PIPE_FORMAT_L8A8_SNORM:
+        case PIPE_FORMAT_L8A8_UINT:
+        case PIPE_FORMAT_L8A8_SINT:
+        case PIPE_FORMAT_L8A8_SRGB:
+        case PIPE_FORMAT_R8G8_UNORM:
+        case PIPE_FORMAT_R8G8_SNORM:
+        case PIPE_FORMAT_R8G8_UINT:
+        case PIPE_FORMAT_R8G8_SINT:
+        case PIPE_FORMAT_Z16_UNORM:
+                return COLORX_8_8;
+        case PIPE_FORMAT_R16_FLOAT:
+        case PIPE_FORMAT_A16_FLOAT:
+        case PIPE_FORMAT_L16_FLOAT:
+        case PIPE_FORMAT_I16_FLOAT:
+                return COLORX_16_FLOAT;
+        /* 32-bit buffers. */
+        case PIPE_FORMAT_A8B8G8R8_SRGB:
+        case PIPE_FORMAT_A8B8G8R8_UNORM:
+        case PIPE_FORMAT_A8R8G8B8_UNORM:
+        case PIPE_FORMAT_B8G8R8A8_SRGB:
+        case PIPE_FORMAT_B8G8R8A8_UNORM:
+        case PIPE_FORMAT_B8G8R8X8_UNORM:
+        case PIPE_FORMAT_R8G8B8A8_SNORM:
+        case PIPE_FORMAT_R8G8B8A8_UNORM:
+        case PIPE_FORMAT_R8G8B8X8_UNORM:
+        case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+        case PIPE_FORMAT_X8B8G8R8_UNORM:
+        case PIPE_FORMAT_X8R8G8B8_UNORM:
+        case PIPE_FORMAT_R8G8B8_UNORM:
+        case PIPE_FORMAT_R8G8B8A8_SINT:
+        case PIPE_FORMAT_R8G8B8A8_UINT:
+        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+        case PIPE_FORMAT_Z24X8_UNORM:
+                return COLORX_8_8_8_8;
+        case PIPE_FORMAT_R32_FLOAT:
+        case PIPE_FORMAT_A32_FLOAT:
+        case PIPE_FORMAT_L32_FLOAT:
+        case PIPE_FORMAT_I32_FLOAT:
+        case PIPE_FORMAT_Z32_FLOAT:
+                return COLORX_32_FLOAT;
+        case PIPE_FORMAT_R16G16_FLOAT:
+        case PIPE_FORMAT_L16A16_FLOAT:
+                return COLORX_16_16_FLOAT;
+        /* 64-bit buffers. */
+        case PIPE_FORMAT_R16G16B16A16_FLOAT:
+                return COLORX_16_16_16_16_FLOAT;
+        case PIPE_FORMAT_R32G32_FLOAT:
+        case PIPE_FORMAT_L32A32_FLOAT:
+                return COLORX_32_32_FLOAT;
+        /* 128-bit buffers. */
+        case PIPE_FORMAT_R32G32B32A32_FLOAT:
+                return COLORX_32_32_32_32_FLOAT;
+        default:
+                return ~0;
+        }
+}
+static inline enum sq_tex_swiz
+tex_swiz(unsigned swiz)
+{
+        switch (swiz) {
+        default:
+        case PIPE_SWIZZLE_RED:   return SQ_TEX_X;
+        case PIPE_SWIZZLE_GREEN: return SQ_TEX_Y;
+        case PIPE_SWIZZLE_BLUE:  return SQ_TEX_Z;
+        case PIPE_SWIZZLE_ALPHA: return SQ_TEX_W;
+        case PIPE_SWIZZLE_ZERO:  return SQ_TEX_ZERO;
+        case PIPE_SWIZZLE_ONE:   return SQ_TEX_ONE;
+        }
+}
+uint32_t
+fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+                unsigned swizzle_b, unsigned swizzle_a)
+{
+        const struct util_format_description *desc =
+                        util_format_description(format);
+        uint8_t swiz[] = {
+                        swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+                        PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE,
+                        PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE,
+        };
+        return A2XX_SQ_TEX_3_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) |
+                        A2XX_SQ_TEX_3_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) |
+                        A2XX_SQ_TEX_3_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) |
+                        A2XX_SQ_TEX_3_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]]));
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_util.h
 ,0 → 1,47
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_UTIL_H_
+#define FD2_UTIL_H_
+#include "freedreno_util.h"
+#include "a2xx.xml.h"
+enum a2xx_sq_surfaceformat fd2_pipe2surface(enum pipe_format format);
+enum a2xx_colorformatx fd2_pipe2color(enum pipe_format format);
+uint32_t fd2_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+                unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+/* convert x,y to dword */
+static inline uint32_t xy2d(uint16_t x, uint16_t y)
+{
+        return ((y & 0x3fff) << 16) | (x & 0x3fff);
+}
+#endif /* FD2_UTIL_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_zsa.c
 ,0 → 1,96
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd2_zsa.h"
+#include "fd2_context.h"
+#include "fd2_util.h"
+void *
+fd2_zsa_state_create(struct pipe_context *pctx,
+                const struct pipe_depth_stencil_alpha_state *cso)
+{
+        struct fd2_zsa_stateobj *so;
+        so = CALLOC_STRUCT(fd2_zsa_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        so->rb_depthcontrol |=
+                A2XX_RB_DEPTHCONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+        if (cso->depth.enabled)
+                so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_ENABLE;
+        if (cso->depth.writemask)
+                so->rb_depthcontrol |= A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE;
+        if (cso->stencil[0].enabled) {
+                const struct pipe_stencil_state *s = &cso->stencil[0];
+                so->rb_depthcontrol |=
+                        A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
+                        A2XX_RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
+                        A2XX_RB_DEPTHCONTROL_STENCILFAIL(fd_stencil_op(s->fail_op)) |
+                        A2XX_RB_DEPTHCONTROL_STENCILZPASS(fd_stencil_op(s->zpass_op)) |
+                        A2XX_RB_DEPTHCONTROL_STENCILZFAIL(fd_stencil_op(s->zfail_op));
+                so->rb_stencilrefmask |=
+xff000000 | /* ??? */
+                        A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+                        A2XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+                if (cso->stencil[1].enabled) {
+                        const struct pipe_stencil_state *bs = &cso->stencil[1];
+                        so->rb_depthcontrol |=
+                                A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE |
+                                A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
+                                A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(fd_stencil_op(bs->fail_op)) |
+                                A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+                                A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(fd_stencil_op(bs->zfail_op));
+                        so->rb_stencilrefmask_bf |=
+xff000000 | /* ??? */
+                                A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+                                A2XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+                }
+        }
+        if (cso->alpha.enabled) {
+                so->rb_colorcontrol =
+                        A2XX_RB_COLORCONTROL_ALPHA_FUNC(cso->alpha.func) |
+                        A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE;
+                so->rb_alpha_ref = fui(cso->alpha.ref_value);
+        }
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
 ,0 → 1,56
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD2_ZSA_H_
+#define FD2_ZSA_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "freedreno_util.h"
+struct fd2_zsa_stateobj {
+        struct pipe_depth_stencil_alpha_state base;
+        uint32_t rb_depthcontrol;
+        uint32_t rb_colorcontrol;   /* must be OR'd w/ blend->rb_colorcontrol */
+        uint32_t rb_alpha_ref;
+        uint32_t rb_stencilrefmask;
+        uint32_t rb_stencilrefmask_bf;
+};
+static INLINE struct fd2_zsa_stateobj *
+fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+        return (struct fd2_zsa_stateobj *)zsa;
+}
+void * fd2_zsa_state_create(struct pipe_context *pctx,
+                const struct pipe_depth_stencil_alpha_state *cso);
+#endif /* FD2_ZSA_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/instr-a2xx.h
 ,0 → 1,390
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef INSTR_A2XX_H_
+#define INSTR_A2XX_H_
+#define PACKED __attribute__((__packed__))
+#include "util/u_math.h"
+#include "adreno_common.xml.h"
+#include "adreno_pm4.xml.h"
+#include "a2xx.xml.h"
+/*
+ * ALU instructions:
+ */
+typedef enum {
+        ADDs = 0,
+        ADD_PREVs = 1,
+        MULs = 2,
+        MUL_PREVs = 3,
+        MUL_PREV2s = 4,
+        MAXs = 5,
+        MINs = 6,
+        SETEs = 7,
+        SETGTs = 8,
+        SETGTEs = 9,
+        SETNEs = 10,
+        FRACs = 11,
+        TRUNCs = 12,
+        FLOORs = 13,
+        EXP_IEEE = 14,
+        LOG_CLAMP = 15,
+        LOG_IEEE = 16,
+        RECIP_CLAMP = 17,
+        RECIP_FF = 18,
+        RECIP_IEEE = 19,
+        RECIPSQ_CLAMP = 20,
+        RECIPSQ_FF = 21,
+        RECIPSQ_IEEE = 22,
+        MOVAs = 23,
+        MOVA_FLOORs = 24,
+        SUBs = 25,
+        SUB_PREVs = 26,
+        PRED_SETEs = 27,
+        PRED_SETNEs = 28,
+        PRED_SETGTs = 29,
+        PRED_SETGTEs = 30,
+        PRED_SET_INVs = 31,
+        PRED_SET_POPs = 32,
+        PRED_SET_CLRs = 33,
+        PRED_SET_RESTOREs = 34,
+        KILLEs = 35,
+        KILLGTs = 36,
+        KILLGTEs = 37,
+        KILLNEs = 38,
+        KILLONEs = 39,
+        SQRT_IEEE = 40,
+        MUL_CONST_0 = 42,
+        MUL_CONST_1 = 43,
+        ADD_CONST_0 = 44,
+        ADD_CONST_1 = 45,
+        SUB_CONST_0 = 46,
+        SUB_CONST_1 = 47,
+        SIN = 48,
+        COS = 49,
+        RETAIN_PREV = 50,
+} instr_scalar_opc_t;
+typedef enum {
+        ADDv = 0,
+        MULv = 1,
+        MAXv = 2,
+        MINv = 3,
+        SETEv = 4,
+        SETGTv = 5,
+        SETGTEv = 6,
+        SETNEv = 7,
+        FRACv = 8,
+        TRUNCv = 9,
+        FLOORv = 10,
+        MULADDv = 11,
+        CNDEv = 12,
+        CNDGTEv = 13,
+        CNDGTv = 14,
+        DOT4v = 15,
+        DOT3v = 16,
+        DOT2ADDv = 17,
+        CUBEv = 18,
+        MAX4v = 19,
+        PRED_SETE_PUSHv = 20,
+        PRED_SETNE_PUSHv = 21,
+        PRED_SETGT_PUSHv = 22,
+        PRED_SETGTE_PUSHv = 23,
+        KILLEv = 24,
+        KILLGTv = 25,
+        KILLGTEv = 26,
+        KILLNEv = 27,
+        DSTv = 28,
+        MOVAv = 29,
+} instr_vector_opc_t;
+typedef struct PACKED {
+        /* dword0: */
+        uint8_t             vector_dest              : 6;
+        uint8_t             vector_dest_rel          : 1;
+        uint8_t             low_precision_16b_fp     : 1;
+        uint8_t             scalar_dest              : 6;
+        uint8_t             scalar_dest_rel          : 1;
+        uint8_t             export_data              : 1;
+        uint8_t             vector_write_mask        : 4;
+        uint8_t             scalar_write_mask        : 4;
+        uint8_t             vector_clamp             : 1;
+        uint8_t             scalar_clamp             : 1;
+        instr_scalar_opc_t  scalar_opc               : 6;
+        /* dword1: */
+        uint8_t             src3_swiz                : 8;
+        uint8_t             src2_swiz                : 8;
+        uint8_t             src1_swiz                : 8;
+        uint8_t             src3_reg_negate          : 1;
+        uint8_t             src2_reg_negate          : 1;
+        uint8_t             src1_reg_negate          : 1;
+        uint8_t             pred_select              : 2;
+        uint8_t             relative_addr            : 1;
+        uint8_t             const_1_rel_abs          : 1;
+        uint8_t             const_0_rel_abs          : 1;
+        /* dword2: */
+        uint8_t             src3_reg                 : 6;
+        uint8_t             src3_reg_select          : 1;
+        uint8_t             src3_reg_abs             : 1;
+        uint8_t             src2_reg                 : 6;
+        uint8_t             src2_reg_select          : 1;
+        uint8_t             src2_reg_abs             : 1;
+        uint8_t             src1_reg                 : 6;
+        uint8_t             src1_reg_select          : 1;
+        uint8_t             src1_reg_abs             : 1;
+        instr_vector_opc_t  vector_opc               : 5;
+        uint8_t             src3_sel                 : 1;
+        uint8_t             src2_sel                 : 1;
+        uint8_t             src1_sel                 : 1;
+} instr_alu_t;
+/*
+ * CF instructions:
+ */
+typedef enum {
+        NOP = 0,
+        EXEC = 1,
+        EXEC_END = 2,
+        COND_EXEC = 3,
+        COND_EXEC_END = 4,
+        COND_PRED_EXEC = 5,
+        COND_PRED_EXEC_END = 6,
+        LOOP_START = 7,
+        LOOP_END = 8,
+        COND_CALL = 9,
+        RETURN = 10,
+        COND_JMP = 11,
+        ALLOC = 12,
+        COND_EXEC_PRED_CLEAN = 13,
+        COND_EXEC_PRED_CLEAN_END = 14,
+        MARK_VS_FETCH_DONE = 15,
+} instr_cf_opc_t;
+typedef enum {
+        RELATIVE_ADDR = 0,
+        ABSOLUTE_ADDR = 1,
+} instr_addr_mode_t;
+typedef enum {
+        SQ_NO_ALLOC = 0,
+        SQ_POSITION = 1,
+        SQ_PARAMETER_PIXEL = 2,
+        SQ_MEMORY = 3,
+} instr_alloc_type_t;
+typedef struct PACKED {
+        uint16_t            address                  : 9;
+        uint8_t             reserved0                : 3;
+        uint8_t             count                    : 3;
+        uint8_t             yeild                    : 1;
+        uint16_t            serialize                : 12;
+        uint8_t             vc                       : 6;   /* vertex cache? */
+        uint8_t             bool_addr                : 8;
+        uint8_t             condition                : 1;
+        instr_addr_mode_t   address_mode             : 1;
+        instr_cf_opc_t      opc                      : 4;
+} instr_cf_exec_t;
+typedef struct PACKED {
+        uint16_t            address                  : 10;
+        uint8_t             reserved0                : 6;
+        uint8_t             loop_id                  : 5;
+        uint32_t            reserved1                : 22;
+        instr_addr_mode_t   address_mode             : 1;
+        instr_cf_opc_t      opc                      : 4;
+} instr_cf_loop_t;
+typedef struct PACKED {
+        uint16_t            address                  : 10;
+        uint8_t             reserved0                : 3;
+        uint8_t             force_call               : 1;
+        uint8_t             predicated_jmp           : 1;
+        uint32_t            reserved1                : 18;
+        uint8_t             direction                : 1;
+        uint8_t             bool_addr                : 8;
+        uint8_t             condition                : 1;
+        instr_addr_mode_t   address_mode             : 1;
+        instr_cf_opc_t      opc                      : 4;
+} instr_cf_jmp_call_t;
+typedef struct PACKED {
+        uint8_t             size                     : 4;
+        uint64_t            reserved0                : 36;
+        uint8_t             no_serial                : 1;
+        instr_alloc_type_t  buffer_select            : 2;
+        uint8_t             alloc_mode               : 1;
+        instr_cf_opc_t      opc                      : 4;
+} instr_cf_alloc_t;
+typedef union PACKED {
+        instr_cf_exec_t     exec;
+        instr_cf_loop_t     loop;
+        instr_cf_jmp_call_t jmp_call;
+        instr_cf_alloc_t    alloc;
+        struct PACKED {
+                uint64_t        dummy                    : 44;
+                instr_cf_opc_t  opc                      : 4;
+        };
+} instr_cf_t;
+/*
+ * FETCH instructions:
+ */
+typedef enum {
+        VTX_FETCH = 0,
+        TEX_FETCH = 1,
+        TEX_GET_BORDER_COLOR_FRAC = 16,
+        TEX_GET_COMP_TEX_LOD = 17,
+        TEX_GET_GRADIENTS = 18,
+        TEX_GET_WEIGHTS = 19,
+        TEX_SET_TEX_LOD = 24,
+        TEX_SET_GRADIENTS_H = 25,
+        TEX_SET_GRADIENTS_V = 26,
+        TEX_RESERVED_4 = 27,
+} instr_fetch_opc_t;
+typedef enum {
+        TEX_FILTER_POINT = 0,
+        TEX_FILTER_LINEAR = 1,
+        TEX_FILTER_BASEMAP = 2,            /* only applicable for mip-filter */
+        TEX_FILTER_USE_FETCH_CONST = 3,
+} instr_tex_filter_t;
+typedef enum {
+        ANISO_FILTER_DISABLED = 0,
+        ANISO_FILTER_MAX_1_1 = 1,
+        ANISO_FILTER_MAX_2_1 = 2,
+        ANISO_FILTER_MAX_4_1 = 3,
+        ANISO_FILTER_MAX_8_1 = 4,
+        ANISO_FILTER_MAX_16_1 = 5,
+        ANISO_FILTER_USE_FETCH_CONST = 7,
+} instr_aniso_filter_t;
+typedef enum {
+        ARBITRARY_FILTER_2X4_SYM = 0,
+        ARBITRARY_FILTER_2X4_ASYM = 1,
+        ARBITRARY_FILTER_4X2_SYM = 2,
+        ARBITRARY_FILTER_4X2_ASYM = 3,
+        ARBITRARY_FILTER_4X4_SYM = 4,
+        ARBITRARY_FILTER_4X4_ASYM = 5,
+        ARBITRARY_FILTER_USE_FETCH_CONST = 7,
+} instr_arbitrary_filter_t;
+typedef enum {
+        SAMPLE_CENTROID = 0,
+        SAMPLE_CENTER = 1,
+} instr_sample_loc_t;
+typedef enum a2xx_sq_surfaceformat instr_surf_fmt_t;
+typedef struct PACKED {
+        /* dword0: */
+        instr_fetch_opc_t   opc                      : 5;
+        uint8_t             src_reg                  : 6;
+        uint8_t             src_reg_am               : 1;
+        uint8_t             dst_reg                  : 6;
+        uint8_t             dst_reg_am               : 1;
+        uint8_t             fetch_valid_only         : 1;
+        uint8_t             const_idx                : 5;
+        uint8_t             tx_coord_denorm          : 1;
+        uint8_t             src_swiz                 : 6;
+        /* dword1: */
+        uint16_t            dst_swiz                 : 12;
+        instr_tex_filter_t  mag_filter               : 2;
+        instr_tex_filter_t  min_filter               : 2;
+        instr_tex_filter_t  mip_filter               : 2;
+        instr_aniso_filter_t aniso_filter            : 3;
+        instr_arbitrary_filter_t arbitrary_filter    : 3;
+        instr_tex_filter_t  vol_mag_filter           : 2;
+        instr_tex_filter_t  vol_min_filter           : 2;
+        uint8_t             use_comp_lod             : 1;
+        uint8_t             use_reg_lod              : 2;  /* 0 for cube, 1 for 2d */
+        uint8_t             pred_select              : 1;
+        /* dword2: */
+        uint8_t             use_reg_gradients        : 1;
+        instr_sample_loc_t  sample_location          : 1;
+        uint8_t             lod_bias                 : 7;
+        uint8_t             unused                   : 7;
+        uint8_t             offset_x                 : 5;
+        uint8_t             offset_y                 : 5;
+        uint8_t             offset_z                 : 5;
+        uint8_t             pred_condition           : 1;
+} instr_fetch_tex_t;
+typedef struct PACKED {
+        /* dword0: */
+        instr_fetch_opc_t   opc                      : 5;
+        uint8_t             src_reg                  : 6;
+        uint8_t             src_reg_am               : 1;
+        uint8_t             dst_reg                  : 6;
+        uint8_t             dst_reg_am               : 1;
+        uint8_t             must_be_one              : 1;
+        uint8_t             const_index              : 5;
+        uint8_t             const_index_sel          : 2;
+        uint8_t             reserved0                : 3;
+        uint8_t             src_swiz                 : 2;
+        /* dword1: */
+        uint16_t            dst_swiz                 : 12;
+        uint8_t             format_comp_all          : 1;   /* '1' for signed, '0' for unsigned? */
+        uint8_t             num_format_all           : 1;   /* '0' for normalized, '1' for unnormalized */
+        uint8_t             signed_rf_mode_all       : 1;
+        uint8_t             reserved1                : 1;
+        instr_surf_fmt_t    format                   : 6;
+        uint8_t             reserved2                : 1;
+        uint8_t             exp_adjust_all           : 7;
+        uint8_t             reserved3                : 1;
+        uint8_t             pred_select              : 1;
+        /* dword2: */
+        uint8_t             stride                   : 8;
+        /* possibly offset and reserved4 are swapped on a200? */
+        uint8_t             offset                   : 8;
+        uint8_t             reserved4                : 8;
+        uint8_t             reserved5                : 7;
+        uint8_t             pred_condition           : 1;
+} instr_fetch_vtx_t;
+typedef union PACKED {
+        instr_fetch_tex_t   tex;
+        instr_fetch_vtx_t   vtx;
+        struct PACKED {
+                /* dword0: */
+                instr_fetch_opc_t opc                    : 5;
+                uint32_t        dummy0                   : 27;
+                /* dword1: */
+                uint32_t        dummy1                   : 32;
+                /* dword2: */
+                uint32_t        dummy2                   : 32;
+        };
+} instr_fetch_t;
+#endif /* INSTR_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
 ,0 → 1,636
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ir-a2xx.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "freedreno_util.h"
+#include "instr-a2xx.h"
+#define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
+#define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
+#define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
+#define REG_MASK 0x3f
+static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr);
+static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
+                uint32_t idx, struct ir2_shader_info *info);
+static void reg_update_stats(struct ir2_register *reg,
+                struct ir2_shader_info *info, bool dest);
+static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n);
+static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg);
+static uint32_t reg_alu_dst_swiz(struct ir2_register *reg);
+static uint32_t reg_alu_src_swiz(struct ir2_register *reg);
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+static void * ir2_alloc(struct ir2_shader *shader, int sz)
+{
+        void *ptr = &shader->heap[shader->heap_idx];
+        shader->heap_idx += align(sz, 4);
+        return ptr;
+}
+static char * ir2_strdup(struct ir2_shader *shader, const char *str)
+{
+        char *ptr = NULL;
+        if (str) {
+                int len = strlen(str);
+                ptr = ir2_alloc(shader, len+1);
+                memcpy(ptr, str, len);
+                ptr[len] = '\0';
+        }
+        return ptr;
+}
+struct ir2_shader * ir2_shader_create(void)
+{
+        DEBUG_MSG("");
+        return calloc(1, sizeof(struct ir2_shader));
+}
+void ir2_shader_destroy(struct ir2_shader *shader)
+{
+        DEBUG_MSG("");
+        free(shader);
+}
+/* resolve addr/cnt/sequence fields in the individual CF's */
+static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info)
+{
+        uint32_t addr;
+        unsigned i;
+        int j;
+        addr = shader->cfs_count / 2;
+        for (i = 0; i < shader->cfs_count; i++) {
+                struct ir2_cf *cf = shader->cfs[i];
+                if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
+                        uint32_t sequence = 0;
+                        if (cf->exec.addr && (cf->exec.addr != addr))
+                                WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
+                        if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
+                                WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
+                        for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
+                                struct ir2_instruction *instr = cf->exec.instrs[j];
+                                sequence <<= 2;
+                                if (instr->instr_type == IR2_FETCH)
+                                        sequence |= 0x1;
+                                if (instr->sync)
+                                        sequence |= 0x2;
+                        }
+                        cf->exec.addr = addr;
+                        cf->exec.cnt  = cf->exec.instrs_count;
+                        cf->exec.sequence = sequence;
+                        addr += cf->exec.instrs_count;
+                }
+        }
+        info->sizedwords = 3 * addr;
+        return 0;
+}
+void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info)
+{
+        uint32_t i, j;
+        uint32_t *ptr, *dwords = NULL;
+        uint32_t idx = 0;
+        int ret;
+        info->sizedwords    = 0;
+        info->max_reg       = -1;
+        info->max_input_reg = 0;
+        info->regs_written  = 0;
+        /* we need an even # of CF's.. insert a NOP if needed */
+        if (shader->cfs_count != align(shader->cfs_count, 2))
+                ir2_cf_create(shader, NOP);
+        /* first pass, resolve sizes and addresses: */
+        ret = shader_resolve(shader, info);
+        if (ret) {
+                ERROR_MSG("resolve failed: %d", ret);
+                goto fail;
+        }
+        ptr = dwords = calloc(4, info->sizedwords);
+        /* second pass, emit CF program in pairs: */
+        for (i = 0; i < shader->cfs_count; i += 2) {
+                instr_cf_t *cfs = (instr_cf_t *)ptr;
+                ret = cf_emit(shader->cfs[i], &cfs[0]);
+                if (ret) {
+                        ERROR_MSG("CF emit failed: %d\n", ret);
+                        goto fail;
+                }
+                ret = cf_emit(shader->cfs[i+1], &cfs[1]);
+                if (ret) {
+                        ERROR_MSG("CF emit failed: %d\n", ret);
+                        goto fail;
+                }
+                ptr += 3;
+                assert((ptr - dwords) <= info->sizedwords);
+        }
+        /* third pass, emit ALU/FETCH: */
+        for (i = 0; i < shader->cfs_count; i++) {
+                struct ir2_cf *cf = shader->cfs[i];
+                if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
+                        for (j = 0; j < cf->exec.instrs_count; j++) {
+                                ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
+                                if (ret) {
+                                        ERROR_MSG("instruction emit failed: %d", ret);
+                                        goto fail;
+                                }
+                                ptr += 3;
+                                assert((ptr - dwords) <= info->sizedwords);
+                        }
+                }
+        }
+        return dwords;
+fail:
+        free(dwords);
+        return NULL;
+}
+struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type)
+{
+        struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf));
+        DEBUG_MSG("%d", cf_type);
+        cf->shader = shader;
+        cf->cf_type = cf_type;
+        assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
+        shader->cfs[shader->cfs_count++] = cf;
+        return cf;
+}
+/*
+ * CF instructions:
+ */
+static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr)
+{
+        memset(instr, 0, sizeof(*instr));
+        instr->opc = cf->cf_type;
+        switch (cf->cf_type) {
+        case NOP:
+                break;
+        case EXEC:
+        case EXEC_END:
+                assert(cf->exec.addr <= 0x1ff);
+                assert(cf->exec.cnt <= 0x6);
+                assert(cf->exec.sequence <= 0xfff);
+                instr->exec.address = cf->exec.addr;
+                instr->exec.count = cf->exec.cnt;
+                instr->exec.serialize = cf->exec.sequence;
+                break;
+        case ALLOC:
+                assert(cf->alloc.size <= 0xf);
+                instr->alloc.size = cf->alloc.size;
+                switch (cf->alloc.type) {
+                case SQ_POSITION:
+                case SQ_PARAMETER_PIXEL:
+                        instr->alloc.buffer_select = cf->alloc.type;
+                        break;
+                default:
+                        ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
+                        return -1;
+                }
+                break;
+        case COND_EXEC:
+        case COND_EXEC_END:
+        case COND_PRED_EXEC:
+        case COND_PRED_EXEC_END:
+        case LOOP_START:
+        case LOOP_END:
+        case COND_CALL:
+        case RETURN:
+        case COND_JMP:
+        case COND_EXEC_PRED_CLEAN:
+        case COND_EXEC_PRED_CLEAN_END:
+        case MARK_VS_FETCH_DONE:
+                ERROR_MSG("TODO");
+                return -1;
+        }
+        return 0;
+}
+struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type)
+{
+        struct ir2_instruction *instr =
+                        ir2_alloc(cf->shader, sizeof(struct ir2_instruction));
+        DEBUG_MSG("%d", instr_type);
+        instr->shader = cf->shader;
+        instr->pred = cf->shader->pred;
+        instr->instr_type = instr_type;
+        assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
+        cf->exec.instrs[cf->exec.instrs_count++] = instr;
+        return instr;
+}
+/*
+ * FETCH instructions:
+ */
+static int instr_emit_fetch(struct ir2_instruction *instr,
+                uint32_t *dwords, uint32_t idx,
+                struct ir2_shader_info *info)
+{
+        instr_fetch_t *fetch = (instr_fetch_t *)dwords;
+        int reg = 0;
+        struct ir2_register *dst_reg = instr->regs[reg++];
+        struct ir2_register *src_reg = instr->regs[reg++];
+        memset(fetch, 0, sizeof(*fetch));
+        reg_update_stats(dst_reg, info, true);
+        reg_update_stats(src_reg, info, false);
+        fetch->opc = instr->fetch.opc;
+        if (instr->fetch.opc == VTX_FETCH) {
+                instr_fetch_vtx_t *vtx = &fetch->vtx;
+                assert(instr->fetch.stride <= 0xff);
+                assert(instr->fetch.fmt <= 0x3f);
+                assert(instr->fetch.const_idx <= 0x1f);
+                assert(instr->fetch.const_idx_sel <= 0x3);
+                vtx->src_reg = src_reg->num;
+                vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
+                vtx->dst_reg = dst_reg->num;
+                vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
+                vtx->must_be_one = 1;
+                vtx->const_index = instr->fetch.const_idx;
+                vtx->const_index_sel = instr->fetch.const_idx_sel;
+                vtx->format_comp_all = !!instr->fetch.is_signed;
+                vtx->num_format_all = !instr->fetch.is_normalized;
+                vtx->format = instr->fetch.fmt;
+                vtx->stride = instr->fetch.stride;
+                vtx->offset = instr->fetch.offset;
+                if (instr->pred != IR2_PRED_NONE) {
+                        vtx->pred_select = 1;
+                        vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
+                }
+                /* XXX seems like every FETCH but the first has
+                 * this bit set:
+                 */
+                vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
+                vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
+        } else if (instr->fetch.opc == TEX_FETCH) {
+                instr_fetch_tex_t *tex = &fetch->tex;
+                assert(instr->fetch.const_idx <= 0x1f);
+                tex->src_reg = src_reg->num;
+                tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
+                tex->dst_reg = dst_reg->num;
+                tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
+                tex->const_idx = instr->fetch.const_idx;
+                tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
+                tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
+                tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
+                tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
+                tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
+                tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
+                tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
+                tex->use_comp_lod = 1;
+                tex->use_reg_lod = !instr->fetch.is_cube;
+                tex->sample_location = SAMPLE_CENTER;
+                if (instr->pred != IR2_PRED_NONE) {
+                        tex->pred_select = 1;
+                        tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
+                }
+        } else {
+                ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
+                return -1;
+        }
+        return 0;
+}
+/*
+ * ALU instructions:
+ */
+static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords,
+                struct ir2_shader_info *info)
+{
+        int reg = 0;
+        instr_alu_t *alu = (instr_alu_t *)dwords;
+        struct ir2_register *dst_reg  = instr->regs[reg++];
+        struct ir2_register *src1_reg;
+        struct ir2_register *src2_reg;
+        struct ir2_register *src3_reg;
+        memset(alu, 0, sizeof(*alu));
+        /* handle instructions w/ 3 src operands: */
+        switch (instr->alu.vector_opc) {
+        case MULADDv:
+        case CNDEv:
+        case CNDGTEv:
+        case CNDGTv:
+        case DOT2ADDv:
+                /* note: disassembler lists 3rd src first, ie:
+                 *   MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
+                 * which is the reason for this strange ordering.
+                 */
+                src3_reg = instr->regs[reg++];
+                break;
+        default:
+                src3_reg = NULL;
+                break;
+        }
+        src1_reg = instr->regs[reg++];
+        src2_reg = instr->regs[reg++];
+        reg_update_stats(dst_reg, info, true);
+        reg_update_stats(src1_reg, info, false);
+        reg_update_stats(src2_reg, info, false);
+        assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0);
+        assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
+        assert((src1_reg->flags & IR2_REG_EXPORT) == 0);
+        assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
+        assert((src2_reg->flags & IR2_REG_EXPORT) == 0);
+        assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
+        if (instr->alu.vector_opc == ~0) {
+                alu->vector_opc          = MAXv;
+                alu->vector_write_mask   = 0;
+        } else {
+                alu->vector_opc          = instr->alu.vector_opc;
+                alu->vector_write_mask   = reg_alu_dst_swiz(dst_reg);
+        }
+        alu->vector_dest         = dst_reg->num;
+        alu->export_data         = !!(dst_reg->flags & IR2_REG_EXPORT);
+        // TODO predicate case/condition.. need to add to parser
+        alu->src2_reg            = src2_reg->num;
+        alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
+        alu->src2_reg_negate     = !!(src2_reg->flags & IR2_REG_NEGATE);
+        alu->src2_reg_abs        = !!(src2_reg->flags & IR2_REG_ABS);
+        alu->src2_sel            = !(src2_reg->flags & IR2_REG_CONST);
+        alu->src1_reg            = src1_reg->num;
+        alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
+        alu->src1_reg_negate     = !!(src1_reg->flags & IR2_REG_NEGATE);
+        alu->src1_reg_abs        = !!(src1_reg->flags & IR2_REG_ABS);
+        alu->src1_sel            = !(src1_reg->flags & IR2_REG_CONST);
+        alu->vector_clamp        = instr->alu.vector_clamp;
+        alu->scalar_clamp        = instr->alu.scalar_clamp;
+        if (instr->alu.scalar_opc != ~0) {
+                struct ir2_register *sdst_reg = instr->regs[reg++];
+                reg_update_stats(sdst_reg, info, true);
+                assert(sdst_reg->flags == dst_reg->flags);
+                if (src3_reg) {
+                        assert(src3_reg == instr->regs[reg]);
+                        reg++;
+                } else {
+                        src3_reg = instr->regs[reg++];
+                }
+                alu->scalar_dest         = sdst_reg->num;
+                alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
+                alu->scalar_opc          = instr->alu.scalar_opc;
+        } else {
+                /* not sure if this is required, but adreno compiler seems
+                 * to always set scalar opc to MAXs if it is not used:
+                 */
+                alu->scalar_opc = MAXs;
+        }
+        if (src3_reg) {
+                reg_update_stats(src3_reg, info, false);
+                alu->src3_reg            = src3_reg->num;
+                alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
+                alu->src3_reg_negate     = !!(src3_reg->flags & IR2_REG_NEGATE);
+                alu->src3_reg_abs        = !!(src3_reg->flags & IR2_REG_ABS);
+                alu->src3_sel            = !(src3_reg->flags & IR2_REG_CONST);
+        } else {
+                /* not sure if this is required, but adreno compiler seems
+                 * to always set register bank for 3rd src if unused:
+                 */
+                alu->src3_sel = 1;
+        }
+        if (instr->pred != IR2_PRED_NONE) {
+                alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2;
+        }
+        return 0;
+}
+static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
+                uint32_t idx, struct ir2_shader_info *info)
+{
+        switch (instr->instr_type) {
+        case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
+        case IR2_ALU:   return instr_emit_alu(instr, dwords, info);
+        }
+        return -1;
+}
+struct ir2_register * ir2_reg_create(struct ir2_instruction *instr,
+                int num, const char *swizzle, int flags)
+{
+        struct ir2_register *reg =
+                        ir2_alloc(instr->shader, sizeof(struct ir2_register));
+        DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
+        assert(num <= REG_MASK);
+        reg->flags = flags;
+        reg->num = num;
+        reg->swizzle = ir2_strdup(instr->shader, swizzle);
+        assert(instr->regs_count < ARRAY_SIZE(instr->regs));
+        instr->regs[instr->regs_count++] = reg;
+        return reg;
+}
+static void reg_update_stats(struct ir2_register *reg,
+                struct ir2_shader_info *info, bool dest)
+{
+        if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) {
+                info->max_reg = MAX2(info->max_reg, reg->num);
+                if (dest) {
+                        info->regs_written |= (1 << reg->num);
+                } else if (!(info->regs_written & (1 << reg->num))) {
+                        /* for registers that haven't been written, they must be an
+                         * input register that the thread scheduler (presumably?)
+                         * needs to know about:
+                         */
+                        info->max_input_reg = MAX2(info->max_input_reg, reg->num);
+                }
+        }
+}
+static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n)
+{
+        uint32_t swiz = 0;
+        int i;
+        assert(reg->flags == 0);
+        assert(reg->swizzle);
+        DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
+        for (i = n-1; i >= 0; i--) {
+                swiz <<= 2;
+                switch (reg->swizzle[i]) {
+                default:
+                        ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
+                case 'x': swiz |= 0x0; break;
+                case 'y': swiz |= 0x1; break;
+                case 'z': swiz |= 0x2; break;
+                case 'w': swiz |= 0x3; break;
+                }
+        }
+        return swiz;
+}
+static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg)
+{
+        uint32_t swiz = 0;
+        int i;
+        assert(reg->flags == 0);
+        assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+        DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
+        if (reg->swizzle) {
+                for (i = 3; i >= 0; i--) {
+                        swiz <<= 3;
+                        switch (reg->swizzle[i]) {
+                        default:
+                                ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
+                        case 'x': swiz |= 0x0; break;
+                        case 'y': swiz |= 0x1; break;
+                        case 'z': swiz |= 0x2; break;
+                        case 'w': swiz |= 0x3; break;
+                        case '0': swiz |= 0x4; break;
+                        case '1': swiz |= 0x5; break;
+                        case '_': swiz |= 0x7; break;
+                        }
+                }
+        } else {
+                swiz = 0x688;
+        }
+        return swiz;
+}
+/* actually, a write-mask */
+static uint32_t reg_alu_dst_swiz(struct ir2_register *reg)
+{
+        uint32_t swiz = 0;
+        int i;
+        assert((reg->flags & ~IR2_REG_EXPORT) == 0);
+        assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+        DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
+        if (reg->swizzle) {
+                for (i = 3; i >= 0; i--) {
+                        swiz <<= 1;
+                        if (reg->swizzle[i] == "xyzw"[i]) {
+                                swiz |= 0x1;
+                        } else if (reg->swizzle[i] != '_') {
+                                ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
+                                break;
+                        }
+                }
+        } else {
+                swiz = 0xf;
+        }
+        return swiz;
+}
+static uint32_t reg_alu_src_swiz(struct ir2_register *reg)
+{
+        uint32_t swiz = 0;
+        int i;
+        assert((reg->flags & IR2_REG_EXPORT) == 0);
+        assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+        DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
+        if (reg->swizzle) {
+                for (i = 3; i >= 0; i--) {
+                        swiz <<= 2;
+                        switch (reg->swizzle[i]) {
+                        default:
+                                ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
+                        case 'x': swiz |= (0x0 - i) & 0x3; break;
+                        case 'y': swiz |= (0x1 - i) & 0x3; break;
+                        case 'z': swiz |= (0x2 - i) & 0x3; break;
+                        case 'w': swiz |= (0x3 - i) & 0x3; break;
+                        }
+                }
+        } else {
+                swiz = 0x0;
+        }
+        return swiz;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a2xx/ir-a2xx.h
 ,0 → 1,180
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IR2_H_
+#define IR2_H_
+#include <stdint.h>
+#include <stdbool.h>
+#include "instr-a2xx.h"
+/* low level intermediate representation of an adreno a2xx shader program */
+struct ir2_shader;
+struct ir2_shader_info {
+        uint16_t sizedwords;
+        int8_t   max_reg;   /* highest GPR # used by shader */
+        uint8_t  max_input_reg;
+        uint64_t regs_written;
+};
+struct ir2_register {
+        enum {
+                IR2_REG_CONST  = 0x1,
+                IR2_REG_EXPORT = 0x2,
+                IR2_REG_NEGATE = 0x4,
+                IR2_REG_ABS    = 0x8,
+        } flags;
+        int num;
+        char *swizzle;
+};
+enum ir2_pred {
+        IR2_PRED_NONE = 0,
+        IR2_PRED_EQ = 1,
+        IR2_PRED_NE = 2,
+};
+struct ir2_instruction {
+        struct ir2_shader *shader;
+        enum {
+                IR2_FETCH,
+                IR2_ALU,
+        } instr_type;
+        enum ir2_pred pred;
+        int sync;
+        unsigned regs_count;
+        struct ir2_register *regs[5];
+        union {
+                /* FETCH specific: */
+                struct {
+                        instr_fetch_opc_t opc;
+                        unsigned const_idx;
+                        /* texture fetch specific: */
+                        bool is_cube : 1;
+                        /* vertex fetch specific: */
+                        unsigned const_idx_sel;
+                        enum a2xx_sq_surfaceformat fmt;
+                        bool is_signed : 1;
+                        bool is_normalized : 1;
+                        uint32_t stride;
+                        uint32_t offset;
+                } fetch;
+                /* ALU specific: */
+                struct {
+                        instr_vector_opc_t vector_opc;
+                        instr_scalar_opc_t scalar_opc;
+                        bool vector_clamp : 1;
+                        bool scalar_clamp : 1;
+                } alu;
+        };
+};
+struct ir2_cf {
+        struct ir2_shader *shader;
+        instr_cf_opc_t cf_type;
+        union {
+                /* EXEC/EXEC_END specific: */
+                struct {
+                        unsigned instrs_count;
+                        struct ir2_instruction *instrs[6];
+                        uint32_t addr, cnt, sequence;
+                } exec;
+                /* ALLOC specific: */
+                struct {
+                        instr_alloc_type_t type;   /* SQ_POSITION or SQ_PARAMETER_PIXEL */
+                        int size;
+                } alloc;
+        };
+};
+struct ir2_shader {
+        unsigned cfs_count;
+        struct ir2_cf *cfs[0x56];
+        uint32_t heap[100 * 4096];
+        unsigned heap_idx;
+        enum ir2_pred pred;  /* pred inherited by newly created instrs */
+};
+struct ir2_shader * ir2_shader_create(void);
+void ir2_shader_destroy(struct ir2_shader *shader);
+void * ir2_shader_assemble(struct ir2_shader *shader,
+                struct ir2_shader_info *info);
+struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type);
+struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type);
+struct ir2_register * ir2_reg_create(struct ir2_instruction *instr,
+                int num, const char *swizzle, int flags);
+/* some helper fxns: */
+static inline struct ir2_cf *
+ir2_cf_create_alloc(struct ir2_shader *shader, instr_alloc_type_t type, int size)
+{
+        struct ir2_cf *cf = ir2_cf_create(shader, ALLOC);
+        if (!cf)
+                return cf;
+        cf->alloc.type = type;
+        cf->alloc.size = size;
+        return cf;
+}
+static inline struct ir2_instruction *
+ir2_instr_create_alu(struct ir2_cf *cf, instr_vector_opc_t vop, instr_scalar_opc_t sop)
+{
+        struct ir2_instruction *instr = ir2_instr_create(cf, IR2_ALU);
+        if (!instr)
+                return instr;
+        instr->alu.vector_opc = vop;
+        instr->alu.scalar_opc = sop;
+        return instr;
+}
+static inline struct ir2_instruction *
+ir2_instr_create_vtx_fetch(struct ir2_cf *cf, int ci, int cis,
+                enum a2xx_sq_surfaceformat fmt, bool is_signed, int stride)
+{
+        struct ir2_instruction *instr = instr = ir2_instr_create(cf, IR2_FETCH);
+        instr->fetch.opc = VTX_FETCH;
+        instr->fetch.const_idx = ci;
+        instr->fetch.const_idx_sel = cis;
+        instr->fetch.fmt = fmt;
+        instr->fetch.is_signed = is_signed;
+        instr->fetch.stride = stride;
+        return instr;
+}
+static inline struct ir2_instruction *
+ir2_instr_create_tex_fetch(struct ir2_cf *cf, int ci)
+{
+        struct ir2_instruction *instr = instr = ir2_instr_create(cf, IR2_FETCH);
+        instr->fetch.opc = TEX_FETCH;
+        instr->fetch.const_idx = ci;
+        return instr;
+}
+#endif /* IR2_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
 ,0 → 1,2828
+#ifndef A3XX_XML
+#define A3XX_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+Copyright (C) 2013-2015 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+enum a3xx_tile_mode {
+        LINEAR = 0,
+        TILE_32X32 = 2,
+};
+enum a3xx_state_block_id {
+        HLSQ_BLOCK_ID_TP_TEX = 2,
+        HLSQ_BLOCK_ID_TP_MIPMAP = 3,
+        HLSQ_BLOCK_ID_SP_VS = 4,
+        HLSQ_BLOCK_ID_SP_FS = 6,
+};
+enum a3xx_cache_opcode {
+        INVALIDATE = 1,
+};
+enum a3xx_vtx_fmt {
+        VFMT_32_FLOAT = 0,
+        VFMT_32_32_FLOAT = 1,
+        VFMT_32_32_32_FLOAT = 2,
+        VFMT_32_32_32_32_FLOAT = 3,
+        VFMT_16_FLOAT = 4,
+        VFMT_16_16_FLOAT = 5,
+        VFMT_16_16_16_FLOAT = 6,
+        VFMT_16_16_16_16_FLOAT = 7,
+        VFMT_32_FIXED = 8,
+        VFMT_32_32_FIXED = 9,
+        VFMT_32_32_32_FIXED = 10,
+        VFMT_32_32_32_32_FIXED = 11,
+        VFMT_16_SINT = 16,
+        VFMT_16_16_SINT = 17,
+        VFMT_16_16_16_SINT = 18,
+        VFMT_16_16_16_16_SINT = 19,
+        VFMT_16_UINT = 20,
+        VFMT_16_16_UINT = 21,
+        VFMT_16_16_16_UINT = 22,
+        VFMT_16_16_16_16_UINT = 23,
+        VFMT_16_SNORM = 24,
+        VFMT_16_16_SNORM = 25,
+        VFMT_16_16_16_SNORM = 26,
+        VFMT_16_16_16_16_SNORM = 27,
+        VFMT_16_UNORM = 28,
+        VFMT_16_16_UNORM = 29,
+        VFMT_16_16_16_UNORM = 30,
+        VFMT_16_16_16_16_UNORM = 31,
+        VFMT_32_UINT = 32,
+        VFMT_32_32_UINT = 33,
+        VFMT_32_32_32_UINT = 34,
+        VFMT_32_32_32_32_UINT = 35,
+        VFMT_32_SINT = 36,
+        VFMT_32_32_SINT = 37,
+        VFMT_32_32_32_SINT = 38,
+        VFMT_32_32_32_32_SINT = 39,
+        VFMT_8_UINT = 40,
+        VFMT_8_8_UINT = 41,
+        VFMT_8_8_8_UINT = 42,
+        VFMT_8_8_8_8_UINT = 43,
+        VFMT_8_UNORM = 44,
+        VFMT_8_8_UNORM = 45,
+        VFMT_8_8_8_UNORM = 46,
+        VFMT_8_8_8_8_UNORM = 47,
+        VFMT_8_SINT = 48,
+        VFMT_8_8_SINT = 49,
+        VFMT_8_8_8_SINT = 50,
+        VFMT_8_8_8_8_SINT = 51,
+        VFMT_8_SNORM = 52,
+        VFMT_8_8_SNORM = 53,
+        VFMT_8_8_8_SNORM = 54,
+        VFMT_8_8_8_8_SNORM = 55,
+        VFMT_10_10_10_2_UINT = 60,
+        VFMT_10_10_10_2_UNORM = 61,
+        VFMT_10_10_10_2_SINT = 62,
+        VFMT_10_10_10_2_SNORM = 63,
+};
+enum a3xx_tex_fmt {
+        TFMT_5_6_5_UNORM = 4,
+        TFMT_5_5_5_1_UNORM = 5,
+        TFMT_4_4_4_4_UNORM = 7,
+        TFMT_Z16_UNORM = 9,
+        TFMT_X8Z24_UNORM = 10,
+        TFMT_Z32_FLOAT = 11,
+        TFMT_NV12_UV_TILED = 17,
+        TFMT_NV12_Y_TILED = 19,
+        TFMT_NV12_UV = 21,
+        TFMT_NV12_Y = 23,
+        TFMT_I420_Y = 24,
+        TFMT_I420_U = 26,
+        TFMT_I420_V = 27,
+        TFMT_ATC_RGB = 32,
+        TFMT_ATC_RGBA_EXPLICIT = 33,
+        TFMT_ETC1 = 34,
+        TFMT_ATC_RGBA_INTERPOLATED = 35,
+        TFMT_DXT1 = 36,
+        TFMT_DXT3 = 37,
+        TFMT_DXT5 = 38,
+        TFMT_10_10_10_2_UNORM = 41,
+        TFMT_9_9_9_E5_FLOAT = 42,
+        TFMT_11_11_10_FLOAT = 43,
+        TFMT_A8_UNORM = 44,
+        TFMT_L8_A8_UNORM = 47,
+        TFMT_8_UNORM = 48,
+        TFMT_8_8_UNORM = 49,
+        TFMT_8_8_8_UNORM = 50,
+        TFMT_8_8_8_8_UNORM = 51,
+        TFMT_8_SNORM = 52,
+        TFMT_8_8_SNORM = 53,
+        TFMT_8_8_8_SNORM = 54,
+        TFMT_8_8_8_8_SNORM = 55,
+        TFMT_8_UINT = 56,
+        TFMT_8_8_UINT = 57,
+        TFMT_8_8_8_UINT = 58,
+        TFMT_8_8_8_8_UINT = 59,
+        TFMT_8_SINT = 60,
+        TFMT_8_8_SINT = 61,
+        TFMT_8_8_8_SINT = 62,
+        TFMT_8_8_8_8_SINT = 63,
+        TFMT_16_FLOAT = 64,
+        TFMT_16_16_FLOAT = 65,
+        TFMT_16_16_16_16_FLOAT = 67,
+        TFMT_16_UINT = 68,
+        TFMT_16_16_UINT = 69,
+        TFMT_16_16_16_16_UINT = 71,
+        TFMT_16_SINT = 72,
+        TFMT_16_16_SINT = 73,
+        TFMT_16_16_16_16_SINT = 75,
+        TFMT_16_UNORM = 76,
+        TFMT_16_16_UNORM = 77,
+        TFMT_16_16_16_16_UNORM = 79,
+        TFMT_16_SNORM = 80,
+        TFMT_16_16_SNORM = 81,
+        TFMT_16_16_16_16_SNORM = 83,
+        TFMT_32_FLOAT = 84,
+        TFMT_32_32_FLOAT = 85,
+        TFMT_32_32_32_32_FLOAT = 87,
+        TFMT_32_UINT = 88,
+        TFMT_32_32_UINT = 89,
+        TFMT_32_32_32_32_UINT = 91,
+        TFMT_32_SINT = 92,
+        TFMT_32_32_SINT = 93,
+        TFMT_32_32_32_32_SINT = 95,
+        TFMT_ETC2_RG11_SNORM = 112,
+        TFMT_ETC2_RG11_UNORM = 113,
+        TFMT_ETC2_R11_SNORM = 114,
+        TFMT_ETC2_R11_UNORM = 115,
+        TFMT_ETC2_RGBA8 = 116,
+        TFMT_ETC2_RGB8A1 = 117,
+        TFMT_ETC2_RGB8 = 118,
+};
+enum a3xx_tex_fetchsize {
+        TFETCH_DISABLE = 0,
+        TFETCH_1_BYTE = 1,
+        TFETCH_2_BYTE = 2,
+        TFETCH_4_BYTE = 3,
+        TFETCH_8_BYTE = 4,
+        TFETCH_16_BYTE = 5,
+};
+enum a3xx_color_fmt {
+        RB_R5G6B5_UNORM = 0,
+        RB_R5G5B5A1_UNORM = 1,
+        RB_R4G4B4A4_UNORM = 3,
+        RB_R8G8B8_UNORM = 4,
+        RB_R8G8B8A8_UNORM = 8,
+        RB_R8G8B8A8_SNORM = 9,
+        RB_R8G8B8A8_UINT = 10,
+        RB_R8G8B8A8_SINT = 11,
+        RB_R8G8_UNORM = 12,
+        RB_R8G8_SNORM = 13,
+        RB_R8_UINT = 14,
+        RB_R8_SINT = 15,
+        RB_R10G10B10A2_UNORM = 16,
+        RB_A8_UNORM = 20,
+        RB_R8_UNORM = 21,
+        RB_R16_FLOAT = 24,
+        RB_R16G16_FLOAT = 25,
+        RB_R16G16B16A16_FLOAT = 27,
+        RB_R11G11B10_FLOAT = 28,
+        RB_R16_SNORM = 32,
+        RB_R16G16_SNORM = 33,
+        RB_R16G16B16A16_SNORM = 35,
+        RB_R16_UNORM = 36,
+        RB_R16G16_UNORM = 37,
+        RB_R16G16B16A16_UNORM = 39,
+        RB_R16_SINT = 40,
+        RB_R16G16_SINT = 41,
+        RB_R16G16B16A16_SINT = 43,
+        RB_R16_UINT = 44,
+        RB_R16G16_UINT = 45,
+        RB_R16G16B16A16_UINT = 47,
+        RB_R32_FLOAT = 48,
+        RB_R32G32_FLOAT = 49,
+        RB_R32G32B32A32_FLOAT = 51,
+        RB_R32_SINT = 52,
+        RB_R32G32_SINT = 53,
+        RB_R32G32B32A32_SINT = 55,
+        RB_R32_UINT = 56,
+        RB_R32G32_UINT = 57,
+        RB_R32G32B32A32_UINT = 59,
+};
+enum a3xx_sp_perfcounter_select {
+        SP_FS_CFLOW_INSTRUCTIONS = 12,
+        SP_FS_FULL_ALU_INSTRUCTIONS = 14,
+        SP0_ICL1_MISSES = 26,
+        SP_ALU_ACTIVE_CYCLES = 29,
+};
+enum a3xx_rop_code {
+        ROP_CLEAR = 0,
+        ROP_NOR = 1,
+        ROP_AND_INVERTED = 2,
+        ROP_COPY_INVERTED = 3,
+        ROP_AND_REVERSE = 4,
+        ROP_INVERT = 5,
+        ROP_XOR = 6,
+        ROP_NAND = 7,
+        ROP_AND = 8,
+        ROP_EQUIV = 9,
+        ROP_NOOP = 10,
+        ROP_OR_INVERTED = 11,
+        ROP_COPY = 12,
+        ROP_OR_REVERSE = 13,
+        ROP_OR = 14,
+        ROP_SET = 15,
+};
+enum a3xx_rb_blend_opcode {
+        BLEND_DST_PLUS_SRC = 0,
+        BLEND_SRC_MINUS_DST = 1,
+        BLEND_DST_MINUS_SRC = 2,
+        BLEND_MIN_DST_SRC = 3,
+        BLEND_MAX_DST_SRC = 4,
+};
+enum a3xx_intp_mode {
+        SMOOTH = 0,
+        FLAT = 1,
+};
+enum a3xx_repl_mode {
+        S = 1,
+        T = 2,
+        ONE_T = 3,
+};
+enum a3xx_tex_filter {
+        A3XX_TEX_NEAREST = 0,
+        A3XX_TEX_LINEAR = 1,
+        A3XX_TEX_ANISO = 2,
+};
+enum a3xx_tex_clamp {
+        A3XX_TEX_REPEAT = 0,
+        A3XX_TEX_CLAMP_TO_EDGE = 1,
+        A3XX_TEX_MIRROR_REPEAT = 2,
+        A3XX_TEX_CLAMP_TO_BORDER = 3,
+        A3XX_TEX_MIRROR_CLAMP = 4,
+};
+enum a3xx_tex_aniso {
+        A3XX_TEX_ANISO_1 = 0,
+        A3XX_TEX_ANISO_2 = 1,
+        A3XX_TEX_ANISO_4 = 2,
+        A3XX_TEX_ANISO_8 = 3,
+        A3XX_TEX_ANISO_16 = 4,
+};
+enum a3xx_tex_swiz {
+        A3XX_TEX_X = 0,
+        A3XX_TEX_Y = 1,
+        A3XX_TEX_Z = 2,
+        A3XX_TEX_W = 3,
+        A3XX_TEX_ZERO = 4,
+        A3XX_TEX_ONE = 5,
+};
+enum a3xx_tex_type {
+        A3XX_TEX_1D = 0,
+        A3XX_TEX_2D = 1,
+        A3XX_TEX_CUBE = 2,
+        A3XX_TEX_3D = 3,
+};
+#define A3XX_INT0_RBBM_GPU_IDLE                                 0x00000001
+#define A3XX_INT0_RBBM_AHB_ERROR                                0x00000002
+#define A3XX_INT0_RBBM_REG_TIMEOUT                              0x00000004
+#define A3XX_INT0_RBBM_ME_MS_TIMEOUT                            0x00000008
+#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT                           0x00000010
+#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW                         0x00000020
+#define A3XX_INT0_VFD_ERROR                                     0x00000040
+#define A3XX_INT0_CP_SW_INT                                     0x00000080
+#define A3XX_INT0_CP_T0_PACKET_IN_IB                            0x00000100
+#define A3XX_INT0_CP_OPCODE_ERROR                               0x00000200
+#define A3XX_INT0_CP_RESERVED_BIT_ERROR                         0x00000400
+#define A3XX_INT0_CP_HW_FAULT                                   0x00000800
+#define A3XX_INT0_CP_DMA                                        0x00001000
+#define A3XX_INT0_CP_IB2_INT                                    0x00002000
+#define A3XX_INT0_CP_IB1_INT                                    0x00004000
+#define A3XX_INT0_CP_RB_INT                                     0x00008000
+#define A3XX_INT0_CP_REG_PROTECT_FAULT                          0x00010000
+#define A3XX_INT0_CP_RB_DONE_TS                                 0x00020000
+#define A3XX_INT0_CP_VS_DONE_TS                                 0x00040000
+#define A3XX_INT0_CP_PS_DONE_TS                                 0x00080000
+#define A3XX_INT0_CACHE_FLUSH_TS                                0x00100000
+#define A3XX_INT0_CP_AHB_ERROR_HALT                             0x00200000
+#define A3XX_INT0_MISC_HANG_DETECT                              0x01000000
+#define A3XX_INT0_UCHE_OOB_ACCESS                               0x02000000
+#define REG_A3XX_RBBM_HW_VERSION                                0x00000000
+#define REG_A3XX_RBBM_HW_RELEASE                                0x00000001
+#define REG_A3XX_RBBM_HW_CONFIGURATION                          0x00000002
+#define REG_A3XX_RBBM_CLOCK_CTL                                 0x00000010
+#define REG_A3XX_RBBM_SP_HYST_CNT                               0x00000012
+#define REG_A3XX_RBBM_SW_RESET_CMD                              0x00000018
+#define REG_A3XX_RBBM_AHB_CTL0                                  0x00000020
+#define REG_A3XX_RBBM_AHB_CTL1                                  0x00000021
+#define REG_A3XX_RBBM_AHB_CMD                                   0x00000022
+#define REG_A3XX_RBBM_AHB_ERROR_STATUS                          0x00000027
+#define REG_A3XX_RBBM_GPR0_CTL                                  0x0000002e
+#define REG_A3XX_RBBM_STATUS                                    0x00000030
+#define A3XX_RBBM_STATUS_HI_BUSY                                0x00000001
+#define A3XX_RBBM_STATUS_CP_ME_BUSY                             0x00000002
+#define A3XX_RBBM_STATUS_CP_PFP_BUSY                            0x00000004
+#define A3XX_RBBM_STATUS_CP_NRT_BUSY                            0x00004000
+#define A3XX_RBBM_STATUS_VBIF_BUSY                              0x00008000
+#define A3XX_RBBM_STATUS_TSE_BUSY                               0x00010000
+#define A3XX_RBBM_STATUS_RAS_BUSY                               0x00020000
+#define A3XX_RBBM_STATUS_RB_BUSY                                0x00040000
+#define A3XX_RBBM_STATUS_PC_DCALL_BUSY                          0x00080000
+#define A3XX_RBBM_STATUS_PC_VSD_BUSY                            0x00100000
+#define A3XX_RBBM_STATUS_VFD_BUSY                               0x00200000
+#define A3XX_RBBM_STATUS_VPC_BUSY                               0x00400000
+#define A3XX_RBBM_STATUS_UCHE_BUSY                              0x00800000
+#define A3XX_RBBM_STATUS_SP_BUSY                                0x01000000
+#define A3XX_RBBM_STATUS_TPL1_BUSY                              0x02000000
+#define A3XX_RBBM_STATUS_MARB_BUSY                              0x04000000
+#define A3XX_RBBM_STATUS_VSC_BUSY                               0x08000000
+#define A3XX_RBBM_STATUS_ARB_BUSY                               0x10000000
+#define A3XX_RBBM_STATUS_HLSQ_BUSY                              0x20000000
+#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC                          0x40000000
+#define A3XX_RBBM_STATUS_GPU_BUSY                               0x80000000
+#define REG_A3XX_RBBM_NQWAIT_UNTIL                              0x00000040
+#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL                      0x00000033
+#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL                    0x00000050
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0                  0x00000051
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1                  0x00000054
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2                  0x00000057
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3                  0x0000005a
+#define REG_A3XX_RBBM_INT_SET_CMD                               0x00000060
+#define REG_A3XX_RBBM_INT_CLEAR_CMD                             0x00000061
+#define REG_A3XX_RBBM_INT_0_MASK                                0x00000063
+#define REG_A3XX_RBBM_INT_0_STATUS                              0x00000064
+#define REG_A3XX_RBBM_PERFCTR_CTL                               0x00000080
+#define A3XX_RBBM_PERFCTR_CTL_ENABLE                            0x00000001
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0                         0x00000081
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1                         0x00000082
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO                     0x00000084
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI                     0x00000085
+#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT                       0x00000086
+#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT                       0x00000087
+#define REG_A3XX_RBBM_GPU_BUSY_MASKED                           0x00000088
+#define REG_A3XX_RBBM_PERFCTR_CP_0_LO                           0x00000090
+#define REG_A3XX_RBBM_PERFCTR_CP_0_HI                           0x00000091
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO                         0x00000092
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI                         0x00000093
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO                         0x00000094
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI                         0x00000095
+#define REG_A3XX_RBBM_PERFCTR_PC_0_LO                           0x00000096
+#define REG_A3XX_RBBM_PERFCTR_PC_0_HI                           0x00000097
+#define REG_A3XX_RBBM_PERFCTR_PC_1_LO                           0x00000098
+#define REG_A3XX_RBBM_PERFCTR_PC_1_HI                           0x00000099
+#define REG_A3XX_RBBM_PERFCTR_PC_2_LO                           0x0000009a
+#define REG_A3XX_RBBM_PERFCTR_PC_2_HI                           0x0000009b
+#define REG_A3XX_RBBM_PERFCTR_PC_3_LO                           0x0000009c
+#define REG_A3XX_RBBM_PERFCTR_PC_3_HI                           0x0000009d
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO                          0x0000009e
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI                          0x0000009f
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO                          0x000000a0
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI                          0x000000a1
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO                         0x000000a2
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI                         0x000000a3
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO                         0x000000a4
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI                         0x000000a5
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO                         0x000000a6
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI                         0x000000a7
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO                         0x000000a8
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI                         0x000000a9
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO                         0x000000aa
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI                         0x000000ab
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO                         0x000000ac
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI                         0x000000ad
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO                          0x000000ae
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI                          0x000000af
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO                          0x000000b0
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI                          0x000000b1
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO                          0x000000b2
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI                          0x000000b3
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO                          0x000000b4
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI                          0x000000b5
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO                          0x000000b6
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI                          0x000000b7
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO                          0x000000b8
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI                          0x000000b9
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO                         0x000000ba
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI                         0x000000bb
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO                         0x000000bc
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI                         0x000000bd
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO                         0x000000be
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI                         0x000000bf
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO                         0x000000c0
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI                         0x000000c1
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO                         0x000000c2
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI                         0x000000c3
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO                         0x000000c4
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI                         0x000000c5
+#define REG_A3XX_RBBM_PERFCTR_TP_0_LO                           0x000000c6
+#define REG_A3XX_RBBM_PERFCTR_TP_0_HI                           0x000000c7
+#define REG_A3XX_RBBM_PERFCTR_TP_1_LO                           0x000000c8
+#define REG_A3XX_RBBM_PERFCTR_TP_1_HI                           0x000000c9
+#define REG_A3XX_RBBM_PERFCTR_TP_2_LO                           0x000000ca
+#define REG_A3XX_RBBM_PERFCTR_TP_2_HI                           0x000000cb
+#define REG_A3XX_RBBM_PERFCTR_TP_3_LO                           0x000000cc
+#define REG_A3XX_RBBM_PERFCTR_TP_3_HI                           0x000000cd
+#define REG_A3XX_RBBM_PERFCTR_TP_4_LO                           0x000000ce
+#define REG_A3XX_RBBM_PERFCTR_TP_4_HI                           0x000000cf
+#define REG_A3XX_RBBM_PERFCTR_TP_5_LO                           0x000000d0
+#define REG_A3XX_RBBM_PERFCTR_TP_5_HI                           0x000000d1
+#define REG_A3XX_RBBM_PERFCTR_SP_0_LO                           0x000000d2
+#define REG_A3XX_RBBM_PERFCTR_SP_0_HI                           0x000000d3
+#define REG_A3XX_RBBM_PERFCTR_SP_1_LO                           0x000000d4
+#define REG_A3XX_RBBM_PERFCTR_SP_1_HI                           0x000000d5
+#define REG_A3XX_RBBM_PERFCTR_SP_2_LO                           0x000000d6
+#define REG_A3XX_RBBM_PERFCTR_SP_2_HI                           0x000000d7
+#define REG_A3XX_RBBM_PERFCTR_SP_3_LO                           0x000000d8
+#define REG_A3XX_RBBM_PERFCTR_SP_3_HI                           0x000000d9
+#define REG_A3XX_RBBM_PERFCTR_SP_4_LO                           0x000000da
+#define REG_A3XX_RBBM_PERFCTR_SP_4_HI                           0x000000db
+#define REG_A3XX_RBBM_PERFCTR_SP_5_LO                           0x000000dc
+#define REG_A3XX_RBBM_PERFCTR_SP_5_HI                           0x000000dd
+#define REG_A3XX_RBBM_PERFCTR_SP_6_LO                           0x000000de
+#define REG_A3XX_RBBM_PERFCTR_SP_6_HI                           0x000000df
+#define REG_A3XX_RBBM_PERFCTR_SP_7_LO                           0x000000e0
+#define REG_A3XX_RBBM_PERFCTR_SP_7_HI                           0x000000e1
+#define REG_A3XX_RBBM_PERFCTR_RB_0_LO                           0x000000e2
+#define REG_A3XX_RBBM_PERFCTR_RB_0_HI                           0x000000e3
+#define REG_A3XX_RBBM_PERFCTR_RB_1_LO                           0x000000e4
+#define REG_A3XX_RBBM_PERFCTR_RB_1_HI                           0x000000e5
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO                          0x000000ea
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI                          0x000000eb
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO                          0x000000ec
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI                          0x000000ed
+#define REG_A3XX_RBBM_RBBM_CTL                                  0x00000100
+#define REG_A3XX_RBBM_DEBUG_BUS_CTL                             0x00000111
+#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS                     0x00000112
+#define REG_A3XX_CP_PFP_UCODE_ADDR                              0x000001c9
+#define REG_A3XX_CP_PFP_UCODE_DATA                              0x000001ca
+#define REG_A3XX_CP_ROQ_ADDR                                    0x000001cc
+#define REG_A3XX_CP_ROQ_DATA                                    0x000001cd
+#define REG_A3XX_CP_MERCIU_ADDR                                 0x000001d1
+#define REG_A3XX_CP_MERCIU_DATA                                 0x000001d2
+#define REG_A3XX_CP_MERCIU_DATA2                                0x000001d3
+#define REG_A3XX_CP_MEQ_ADDR                                    0x000001da
+#define REG_A3XX_CP_MEQ_DATA                                    0x000001db
+#define REG_A3XX_CP_WFI_PEND_CTR                                0x000001f5
+#define REG_A3XX_RBBM_PM_OVERRIDE2                              0x0000039d
+#define REG_A3XX_CP_PERFCOUNTER_SELECT                          0x00000445
+#define REG_A3XX_CP_HW_FAULT                                    0x0000045c
+#define REG_A3XX_CP_PROTECT_CTRL                                0x0000045e
+#define REG_A3XX_CP_PROTECT_STATUS                              0x0000045f
+static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+#define REG_A3XX_CP_AHB_FAULT                                   0x0000054d
+#define REG_A3XX_SQ_GPR_MANAGEMENT                              0x00000d00
+#define REG_A3XX_SQ_INST_STORE_MANAGMENT                        0x00000d02
+#define REG_A3XX_TP0_CHICKEN                                    0x00000e1e
+#define REG_A3XX_SP_GLOBAL_MEM_SIZE                             0x00000e22
+#define REG_A3XX_SP_GLOBAL_MEM_ADDR                             0x00000e23
+#define REG_A3XX_GRAS_CL_CLIP_CNTL                              0x00002040
+#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER                  0x00001000
+#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE                     0x00010000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE                0x00020000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE              0x00080000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE                 0x00100000
+#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE           0x00200000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD                           0x00800000
+#define A3XX_GRAS_CL_CLIP_CNTL_WCOORD                           0x01000000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE                    0x02000000
+#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ                            0x00002044
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK                     0x000003ff
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT                    0
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
+}
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK                     0x000ffc00
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT                    10
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
+}
+#define REG_A3XX_GRAS_CL_VPORT_XOFFSET                          0x00002048
+#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK                        0xffffffff
+#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT                       0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val)
+{
+        return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK;
+}
+#define REG_A3XX_GRAS_CL_VPORT_XSCALE                           0x00002049
+#define A3XX_GRAS_CL_VPORT_XSCALE__MASK                         0xffffffff
+#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT                        0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val)
+{
+        return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK;
+}
+#define REG_A3XX_GRAS_CL_VPORT_YOFFSET                          0x0000204a
+#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK                        0xffffffff
+#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT                       0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val)
+{
+        return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK;
+}
+#define REG_A3XX_GRAS_CL_VPORT_YSCALE                           0x0000204b
+#define A3XX_GRAS_CL_VPORT_YSCALE__MASK                         0xffffffff
+#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT                        0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val)
+{
+        return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK;
+}
+#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET                          0x0000204c
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK                        0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT                       0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val)
+{
+        return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK;
+}
+#define REG_A3XX_GRAS_CL_VPORT_ZSCALE                           0x0000204d
+#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK                         0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT                        0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
+{
+        return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK;
+}
+#define REG_A3XX_GRAS_SU_POINT_MINMAX                           0x00002068
+#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK                     0x0000ffff
+#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT                    0
+static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK                     0xffff0000
+#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT                    16
+static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
+}
+#define REG_A3XX_GRAS_SU_POINT_SIZE                             0x00002069
+#define A3XX_GRAS_SU_POINT_SIZE__MASK                           0xffffffff
+#define A3XX_GRAS_SU_POINT_SIZE__SHIFT                          0
+static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
+{
+        return ((((int32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK;
+}
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE                      0x0000206c
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK                0x00ffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT               0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
+{
+        return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+}
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET                     0x0000206d
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK                   0xffffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT                  0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+        return ((((int32_t)(val * 64.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+#define REG_A3XX_GRAS_SU_MODE_CONTROL                           0x00002070
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT                    0x00000001
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK                     0x00000002
+#define A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW                      0x00000004
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK           0x000007f8
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT          3
+static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val)
+{
+        return ((((int32_t)(val * 4.0))) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
+}
+#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET                   0x00000800
+#define REG_A3XX_GRAS_SC_CONTROL                                0x00002072
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK                  0x000000f0
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT                 4
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+        return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK                 0x00000f00
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT                8
+static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val)
+{
+        return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK                  0x0000f000
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT                 12
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
+}
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL                      0x00002074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE    0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK                  0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT                 0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK                  0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT                 16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR                      0x00002075
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE    0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK                  0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT                 0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK                  0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT                 16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL                      0x00002079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE    0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK                  0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT                 0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK                  0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT                 16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR                      0x0000207a
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE    0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK                  0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT                 0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK                  0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT                 16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+        return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+#define REG_A3XX_RB_MODE_CONTROL                                0x000020c0
+#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS                        0x00000080
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK                  0x00000700
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT                 8
+static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+        return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_RB_MODE_CONTROL_MRT__MASK                          0x00003000
+#define A3XX_RB_MODE_CONTROL_MRT__SHIFT                         12
+static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
+{
+        return ((val) << A3XX_RB_MODE_CONTROL_MRT__SHIFT) & A3XX_RB_MODE_CONTROL_MRT__MASK;
+}
+#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE              0x00008000
+#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE                0x00010000
+#define REG_A3XX_RB_RENDER_CONTROL                              0x000020c1
+#define A3XX_RB_RENDER_CONTROL_FACENESS                         0x00000008
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK                  0x00000ff0
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT                 4
+static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
+{
+        return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK;
+}
+#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE               0x00001000
+#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM                      0x00002000
+#define A3XX_RB_RENDER_CONTROL_XCOORD                           0x00004000
+#define A3XX_RB_RENDER_CONTROL_YCOORD                           0x00008000
+#define A3XX_RB_RENDER_CONTROL_ZCOORD                           0x00010000
+#define A3XX_RB_RENDER_CONTROL_WCOORD                           0x00020000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST                       0x00400000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK            0x07000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT           24
+static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+        return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+#define REG_A3XX_RB_MSAA_CONTROL                                0x000020c2
+#define A3XX_RB_MSAA_CONTROL_DISABLE                            0x00000400
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK                      0x0000f000
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT                     12
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val)
+{
+        return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK;
+}
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK                  0xffff0000
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT                 16
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
+{
+        return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK;
+}
+#define REG_A3XX_RB_ALPHA_REF                                   0x000020c3
+#define A3XX_RB_ALPHA_REF_UINT__MASK                            0x0000ff00
+#define A3XX_RB_ALPHA_REF_UINT__SHIFT                           8
+static inline uint32_t A3XX_RB_ALPHA_REF_UINT(uint32_t val)
+{
+        return ((val) << A3XX_RB_ALPHA_REF_UINT__SHIFT) & A3XX_RB_ALPHA_REF_UINT__MASK;
+}
+#define A3XX_RB_ALPHA_REF_FLOAT__MASK                           0xffff0000
+#define A3XX_RB_ALPHA_REF_FLOAT__SHIFT                          16
+static inline uint32_t A3XX_RB_ALPHA_REF_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A3XX_RB_ALPHA_REF_FLOAT__SHIFT) & A3XX_RB_ALPHA_REF_FLOAT__MASK;
+}
+static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE                    0x00000008
+#define A3XX_RB_MRT_CONTROL_BLEND                               0x00000010
+#define A3XX_RB_MRT_CONTROL_BLEND2                              0x00000020
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK                      0x00000f00
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT                     8
+static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+        return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK                   0x00003000
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT                  12
+static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+        return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK              0x0f000000
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT             24
+static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+        return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; }
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK                 0x0000003f
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT                0
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
+{
+        return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK              0x000000c0
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT             6
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val)
+{
+        return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK                   0x00000c00
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT                  10
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SRGB                         0x00004000
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK              0xfffe0000
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT             17
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+{
+        return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+}
+static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; }
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK               0xfffffff0
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT              4
+static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
+{
+        return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
+}
+static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; }
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK          0x0000001f
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT         0
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK        0x000000e0
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT       5
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+        return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK         0x00001f00
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT        8
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK        0x001f0000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT       16
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK      0x00e00000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT     21
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a3xx_rb_blend_opcode val)
+{
+        return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK       0x1f000000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT      24
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE                  0x20000000
+#define REG_A3XX_RB_BLEND_RED                                   0x000020e4
+#define A3XX_RB_BLEND_RED_UINT__MASK                            0x000000ff
+#define A3XX_RB_BLEND_RED_UINT__SHIFT                           0
+static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val)
+{
+        return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK;
+}
+#define A3XX_RB_BLEND_RED_FLOAT__MASK                           0xffff0000
+#define A3XX_RB_BLEND_RED_FLOAT__SHIFT                          16
+static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK;
+}
+#define REG_A3XX_RB_BLEND_GREEN                                 0x000020e5
+#define A3XX_RB_BLEND_GREEN_UINT__MASK                          0x000000ff
+#define A3XX_RB_BLEND_GREEN_UINT__SHIFT                         0
+static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val)
+{
+        return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK;
+}
+#define A3XX_RB_BLEND_GREEN_FLOAT__MASK                         0xffff0000
+#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT                        16
+static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK;
+}
+#define REG_A3XX_RB_BLEND_BLUE                                  0x000020e6
+#define A3XX_RB_BLEND_BLUE_UINT__MASK                           0x000000ff
+#define A3XX_RB_BLEND_BLUE_UINT__SHIFT                          0
+static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val)
+{
+        return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK;
+}
+#define A3XX_RB_BLEND_BLUE_FLOAT__MASK                          0xffff0000
+#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT                         16
+static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK;
+}
+#define REG_A3XX_RB_BLEND_ALPHA                                 0x000020e7
+#define A3XX_RB_BLEND_ALPHA_UINT__MASK                          0x000000ff
+#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT                         0
+static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val)
+{
+        return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK;
+}
+#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK                         0xffff0000
+#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT                        16
+static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK;
+}
+#define REG_A3XX_RB_CLEAR_COLOR_DW0                             0x000020e8
+#define REG_A3XX_RB_CLEAR_COLOR_DW1                             0x000020e9
+#define REG_A3XX_RB_CLEAR_COLOR_DW2                             0x000020ea
+#define REG_A3XX_RB_CLEAR_COLOR_DW3                             0x000020eb
+#define REG_A3XX_RB_COPY_CONTROL                                0x000020ec
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK                 0x00000003
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT                0
+static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
+{
+        return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_DEPTHCLEAR                         0x00000008
+#define A3XX_RB_COPY_CONTROL_MODE__MASK                         0x00000070
+#define A3XX_RB_COPY_CONTROL_MODE__SHIFT                        4
+static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
+{
+        return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK                    0x00000f00
+#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT                   8
+static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
+{
+        return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_UNK12                              0x00001000
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK                    0xffffc000
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT                   14
+static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
+{
+        return ((val >> 14) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
+}
+#define REG_A3XX_RB_COPY_DEST_BASE                              0x000020ed
+#define A3XX_RB_COPY_DEST_BASE_BASE__MASK                       0xfffffff0
+#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT                      4
+static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
+{
+        return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK;
+}
+#define REG_A3XX_RB_COPY_DEST_PITCH                             0x000020ee
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK                     0xffffffff
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT                    0
+static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
+{
+        return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK;
+}
+#define REG_A3XX_RB_COPY_DEST_INFO                              0x000020ef
+#define A3XX_RB_COPY_DEST_INFO_TILE__MASK                       0x00000003
+#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT                      0
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val)
+{
+        return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK                     0x000000fc
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT                    2
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val)
+{
+        return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK                       0x00000300
+#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT                      8
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK                0x00000c00
+#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT               10
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+        return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK           0x0003c000
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT          14
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
+{
+        return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK                     0x001c0000
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT                    18
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
+{
+        return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
+}
+#define REG_A3XX_RB_DEPTH_CONTROL                               0x00002100
+#define A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z                     0x00000001
+#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE                          0x00000002
+#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE                    0x00000004
+#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE                   0x00000008
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK                       0x00000070
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT                      4
+static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
+{
+        return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
+}
+#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE                         0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE                     0x80000000
+#define REG_A3XX_RB_DEPTH_CLEAR                                 0x00002101
+#define REG_A3XX_RB_DEPTH_INFO                                  0x00002102
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK                   0x00000003
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT                  0
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+        return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK                     0xfffff800
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT                    11
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+        return ((val >> 12) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+#define REG_A3XX_RB_DEPTH_PITCH                                 0x00002103
+#define A3XX_RB_DEPTH_PITCH__MASK                               0xffffffff
+#define A3XX_RB_DEPTH_PITCH__SHIFT                              0
+static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val)
+{
+        return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK;
+}
+#define REG_A3XX_RB_STENCIL_CONTROL                             0x00002104
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE                  0x00000001
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF               0x00000002
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_READ                    0x00000004
+#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK                      0x00000700
+#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT                     8
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK                      0x00003800
+#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT                     11
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK                     0x0001c000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT                    14
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK                     0x000e0000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT                    17
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK                   0x00700000
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT                  20
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK                   0x03800000
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT                  23
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK                  0x1c000000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT                 26
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK                  0xe0000000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT                 29
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+#define REG_A3XX_RB_STENCIL_CLEAR                               0x00002105
+#define REG_A3XX_RB_STENCIL_INFO                                0x00002106
+#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK                 0xfffff800
+#define A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT                11
+static inline uint32_t A3XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
+{
+        return ((val >> 12) << A3XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A3XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
+}
+#define REG_A3XX_RB_STENCIL_PITCH                               0x00002107
+#define A3XX_RB_STENCIL_PITCH__MASK                             0xffffffff
+#define A3XX_RB_STENCIL_PITCH__SHIFT                            0
+static inline uint32_t A3XX_RB_STENCIL_PITCH(uint32_t val)
+{
+        return ((val >> 3) << A3XX_RB_STENCIL_PITCH__SHIFT) & A3XX_RB_STENCIL_PITCH__MASK;
+}
+#define REG_A3XX_RB_STENCILREFMASK                              0x00002108
+#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK                 0x000000ff
+#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT                0
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+        return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK                0x0000ff00
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT               8
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+        return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK           0x00ff0000
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT          16
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+        return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+#define REG_A3XX_RB_STENCILREFMASK_BF                           0x00002109
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK              0x000000ff
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT             0
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+        return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK             0x0000ff00
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT            8
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+        return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK        0x00ff0000
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT       16
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+        return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+#define REG_A3XX_RB_LRZ_VSC_CONTROL                             0x0000210c
+#define A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE                  0x00000002
+#define REG_A3XX_RB_WINDOW_OFFSET                               0x0000210e
+#define A3XX_RB_WINDOW_OFFSET_X__MASK                           0x0000ffff
+#define A3XX_RB_WINDOW_OFFSET_X__SHIFT                          0
+static inline uint32_t A3XX_RB_WINDOW_OFFSET_X(uint32_t val)
+{
+        return ((val) << A3XX_RB_WINDOW_OFFSET_X__SHIFT) & A3XX_RB_WINDOW_OFFSET_X__MASK;
+}
+#define A3XX_RB_WINDOW_OFFSET_Y__MASK                           0xffff0000
+#define A3XX_RB_WINDOW_OFFSET_Y__SHIFT                          16
+static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val)
+{
+        return ((val) << A3XX_RB_WINDOW_OFFSET_Y__SHIFT) & A3XX_RB_WINDOW_OFFSET_Y__MASK;
+}
+#define REG_A3XX_RB_SAMPLE_COUNT_CONTROL                        0x00002110
+#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET                      0x00000001
+#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY                       0x00000002
+#define REG_A3XX_RB_SAMPLE_COUNT_ADDR                           0x00002111
+#define REG_A3XX_RB_Z_CLAMP_MIN                                 0x00002114
+#define REG_A3XX_RB_Z_CLAMP_MAX                                 0x00002115
+#define REG_A3XX_VGT_BIN_BASE                                   0x000021e1
+#define REG_A3XX_VGT_BIN_SIZE                                   0x000021e2
+#define REG_A3XX_PC_VSTREAM_CONTROL                             0x000021e4
+#define A3XX_PC_VSTREAM_CONTROL_SIZE__MASK                      0x003f0000
+#define A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT                     16
+static inline uint32_t A3XX_PC_VSTREAM_CONTROL_SIZE(uint32_t val)
+{
+        return ((val) << A3XX_PC_VSTREAM_CONTROL_SIZE__SHIFT) & A3XX_PC_VSTREAM_CONTROL_SIZE__MASK;
+}
+#define A3XX_PC_VSTREAM_CONTROL_N__MASK                         0x07c00000
+#define A3XX_PC_VSTREAM_CONTROL_N__SHIFT                        22
+static inline uint32_t A3XX_PC_VSTREAM_CONTROL_N(uint32_t val)
+{
+        return ((val) << A3XX_PC_VSTREAM_CONTROL_N__SHIFT) & A3XX_PC_VSTREAM_CONTROL_N__MASK;
+}
+#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL                     0x000021ea
+#define REG_A3XX_PC_PRIM_VTX_CNTL                               0x000021ec
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK               0x0000001f
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT              0
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val)
+{
+        return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK        0x000000e0
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT       5
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+        return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK         0x00000700
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT        8
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+        return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE                   0x00001000
+#define A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART                 0x00100000
+#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST                0x02000000
+#define A3XX_PC_PRIM_VTX_CNTL_PSIZE                             0x04000000
+#define REG_A3XX_PC_RESTART_INDEX                               0x000021ed
+#define REG_A3XX_HLSQ_CONTROL_0_REG                             0x00002200
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK              0x00000010
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT             4
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE             0x00000040
+#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART                 0x00000200
+#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2                       0x00000400
+#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE                    0x04000000
+#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK                 0x08000000
+#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT                27
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE               0x10000000
+#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE               0x20000000
+#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE                    0x40000000
+#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT                   0x80000000
+#define REG_A3XX_HLSQ_CONTROL_1_REG                             0x00002201
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK              0x00000040
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT             6
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE             0x00000100
+#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1                       0x00000200
+#define A3XX_HLSQ_CONTROL_1_REG_ZWCOORD                         0x02000000
+#define REG_A3XX_HLSQ_CONTROL_2_REG                             0x00002202
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK        0xfc000000
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT       26
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+#define REG_A3XX_HLSQ_CONTROL_3_REG                             0x00002203
+#define A3XX_HLSQ_CONTROL_3_REG_REGID__MASK                     0x000000ff
+#define A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT                    0
+static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A3XX_HLSQ_CONTROL_3_REG_REGID__MASK;
+}
+#define REG_A3XX_HLSQ_VS_CONTROL_REG                            0x00002204
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK              0x00000fff
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT             0
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK         0x00fff000
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT        12
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK              0xff000000
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT             24
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+#define REG_A3XX_HLSQ_FS_CONTROL_REG                            0x00002205
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK              0x00000fff
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT             0
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK         0x00fff000
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT        12
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK              0xff000000
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT             24
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG                   0x00002206
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK      0x0000ffff
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT     0
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK        0xffff0000
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT       16
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG                   0x00002207
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK      0x0000ffff
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT     0
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK        0xffff0000
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT       16
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG                          0x0000220a
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK                0x00000003
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT               0
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK             0x00000ffc
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT            2
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK             0x003ff000
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT            12
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK;
+}
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK             0xffc00000
+#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT            22
+static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val)
+{
+        return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK;
+}
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; }
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; }
+static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; }
+#define REG_A3XX_HLSQ_CL_CONTROL_0_REG                          0x00002211
+#define REG_A3XX_HLSQ_CL_CONTROL_1_REG                          0x00002212
+#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG                       0x00002214
+static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; }
+static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; }
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG                     0x00002216
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG                     0x00002217
+#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG                          0x0000221a
+#define REG_A3XX_VFD_CONTROL_0                                  0x00002240
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK                  0x0003ffff
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT                 0
+static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
+{
+        return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
+}
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK                     0x003c0000
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT                    18
+static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val)
+{
+        return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK                0x07c00000
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT               22
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
+{
+        return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK              0xf8000000
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT             27
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
+{
+        return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
+}
+#define REG_A3XX_VFD_CONTROL_1                                  0x00002241
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK                     0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT                    0
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
+{
+        return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK                      0x00ff0000
+#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT                     16
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+        return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4INST__MASK                     0xff000000
+#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT                    24
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+        return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+#define REG_A3XX_VFD_INDEX_MIN                                  0x00002242
+#define REG_A3XX_VFD_INDEX_MAX                                  0x00002243
+#define REG_A3XX_VFD_INSTANCEID_OFFSET                          0x00002244
+#define REG_A3XX_VFD_INDEX_OFFSET                               0x00002245
+#define REG_A3XX_VFD_INDEX_OFFSET                               0x00002245
+static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK                  0x0000007f
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT                 0
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+{
+        return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK                  0x0000ff80
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT                 7
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
+{
+        return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_INSTANCED                        0x00010000
+#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT                       0x00020000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK                  0x00fc0000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT                 18
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val)
+{
+        return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK                   0xff000000
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT                  24
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
+{
+        return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
+}
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; }
+static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK                   0x0000000f
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT                  0
+static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+{
+        return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_CONSTFILL                         0x00000010
+#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK                      0x00000fc0
+#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT                     6
+static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val)
+{
+        return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_REGID__MASK                       0x000ff000
+#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT                      12
+static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val)
+{
+        return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_INT                               0x00100000
+#define A3XX_VFD_DECODE_INSTR_SWAP__MASK                        0x00c00000
+#define A3XX_VFD_DECODE_INSTR_SWAP__SHIFT                       22
+static inline uint32_t A3XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A3XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A3XX_VFD_DECODE_INSTR_SWAP__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK                    0x1f000000
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT                   24
+static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
+{
+        return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID                     0x20000000
+#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT                        0x40000000
+#define REG_A3XX_VFD_VS_THREADING_THRESHOLD                     0x0000227e
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK   0x0000000f
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT  0
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val)
+{
+        return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK;
+}
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK      0x0000ff00
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT     8
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val)
+{
+        return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK;
+}
+#define REG_A3XX_VPC_ATTR                                       0x00002280
+#define A3XX_VPC_ATTR_TOTALATTR__MASK                           0x000001ff
+#define A3XX_VPC_ATTR_TOTALATTR__SHIFT                          0
+static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val)
+{
+        return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK;
+}
+#define A3XX_VPC_ATTR_PSIZE                                     0x00000200
+#define A3XX_VPC_ATTR_THRDASSIGN__MASK                          0x0ffff000
+#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT                         12
+static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val)
+{
+        return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK;
+}
+#define A3XX_VPC_ATTR_LMSIZE__MASK                              0xf0000000
+#define A3XX_VPC_ATTR_LMSIZE__SHIFT                             28
+static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val)
+{
+        return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK;
+}
+#define REG_A3XX_VPC_PACK                                       0x00002281
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK                      0x0000ff00
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT                     8
+static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
+{
+        return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
+}
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK                      0x00ff0000
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT                     16
+static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+{
+        return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+}
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+#define A3XX_VPC_VARYING_INTERP_MODE_C0__MASK                   0x00000003
+#define A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT                  0
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C0(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C0__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C1__MASK                   0x0000000c
+#define A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT                  2
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C1(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C1__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C2__MASK                   0x00000030
+#define A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT                  4
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C2(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C2__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C3__MASK                   0x000000c0
+#define A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT                  6
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C3(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C3__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C4__MASK                   0x00000300
+#define A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT                  8
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C4(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C4__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C5__MASK                   0x00000c00
+#define A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT                  10
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C5(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C5__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C6__MASK                   0x00003000
+#define A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT                  12
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C6(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C6__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C7__MASK                   0x0000c000
+#define A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT                  14
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C7(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C7__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C8__MASK                   0x00030000
+#define A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT                  16
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C8(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C8__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_C9__MASK                   0x000c0000
+#define A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT                  18
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C9(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C9__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CA__MASK                   0x00300000
+#define A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT                  20
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CA(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CA__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CB__MASK                   0x00c00000
+#define A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT                  22
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CB(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CB__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CC__MASK                   0x03000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT                  24
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CC(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CC__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CD__MASK                   0x0c000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT                  26
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CD(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CD__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CE__MASK                   0x30000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT                  28
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CE(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CE__MASK;
+}
+#define A3XX_VPC_VARYING_INTERP_MODE_CF__MASK                   0xc0000000
+#define A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT                  30
+static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CF(enum a3xx_intp_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CF__MASK;
+}
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK                  0x00000003
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT                 0
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C0(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C0__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C0__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK                  0x0000000c
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT                 2
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C1(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C1__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C1__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK                  0x00000030
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT                 4
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C2(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C2__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C2__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK                  0x000000c0
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT                 6
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C3(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C3__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C3__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK                  0x00000300
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT                 8
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C4(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C4__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C4__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK                  0x00000c00
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT                 10
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C5(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C5__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C5__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK                  0x00003000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT                 12
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C6(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C6__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C6__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK                  0x0000c000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT                 14
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C7(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C7__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C7__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK                  0x00030000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT                 16
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C8(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C8__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C8__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK                  0x000c0000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT                 18
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_C9(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_C9__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_C9__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK                  0x00300000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT                 20
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CA(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CA__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CA__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK                  0x00c00000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT                 22
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CB(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CB__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CB__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK                  0x03000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT                 24
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CC(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CC__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CC__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK                  0x0c000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT                 26
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CD(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CD__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CD__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK                  0x30000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT                 28
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CE(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CE__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CE__MASK;
+}
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK                  0xc0000000
+#define A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT                 30
+static inline uint32_t A3XX_VPC_VARYING_PS_REPL_MODE_CF(enum a3xx_repl_mode val)
+{
+        return ((val) << A3XX_VPC_VARYING_PS_REPL_MODE_CF__SHIFT) & A3XX_VPC_VARYING_PS_REPL_MODE_CF__MASK;
+}
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0                      0x0000228a
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1                      0x0000228b
+#define REG_A3XX_SP_SP_CTRL_REG                                 0x000022c0
+#define A3XX_SP_SP_CTRL_REG_RESOLVE                             0x00010000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK                     0x00040000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT                    18
+static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val)
+{
+        return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_BINNING                             0x00080000
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK                     0x00300000
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT                    20
+static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val)
+{
+        return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_L0MODE__MASK                        0x00c00000
+#define A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT                       22
+static inline uint32_t A3XX_SP_SP_CTRL_REG_L0MODE(uint32_t val)
+{
+        return ((val) << A3XX_SP_SP_CTRL_REG_L0MODE__SHIFT) & A3XX_SP_SP_CTRL_REG_L0MODE__MASK;
+}
+#define REG_A3XX_SP_VS_CTRL_REG0                                0x000022c4
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK                   0x00000001
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT                  0
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK              0x00000002
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT             1
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID                       0x00000004
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK             0x000003f0
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT            4
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK             0x0003fc00
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT            10
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK              0x000c0000
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT             18
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK                   0x00100000
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT                  20
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE                    0x00200000
+#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE                       0x00400000
+#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE                        0x00800000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK                       0xff000000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT                      24
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK;
+}
+#define REG_A3XX_SP_VS_CTRL_REG1                                0x000022c5
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK                  0x000003ff
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT                 0
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK               0x000ffc00
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT              10
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK           0x7f000000
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT          24
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+#define REG_A3XX_SP_VS_PARAM_REG                                0x000022c6
+#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK                     0x000000ff
+#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT                    0
+static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK                   0x0000ff00
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT                  8
+static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK                0xfff00000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT               20
+static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+}
+static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+#define A3XX_SP_VS_OUT_REG_A_REGID__MASK                        0x000001ff
+#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT                       0
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK                     0x00001e00
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT                    9
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_REGID__MASK                        0x01ff0000
+#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT                       16
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK                     0x1e000000
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT                    25
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK                    0x000000ff
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT                   0
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK                    0x0000ff00
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT                   8
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK                    0x00ff0000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT                   16
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK                    0xff000000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT                   24
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+#define REG_A3XX_SP_VS_OBJ_OFFSET_REG                           0x000022d4
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK       0x01ff0000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT      16
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK         0xfe000000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT        25
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+#define REG_A3XX_SP_VS_OBJ_START_REG                            0x000022d5
+#define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG                        0x000022d6
+#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG                         0x000022d7
+#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG                         0x000022d8
+#define REG_A3XX_SP_VS_LENGTH_REG                               0x000022df
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK                0xffffffff
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT               0
+static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+#define REG_A3XX_SP_FS_CTRL_REG0                                0x000022e0
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK                   0x00000001
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT                  0
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK              0x00000002
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT             1
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID                       0x00000004
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK             0x000003f0
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT            4
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK             0x0003fc00
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT            10
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK              0x000c0000
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT             18
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK                   0x00100000
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT                  20
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE                    0x00200000
+#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE                       0x00400000
+#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE                        0x00800000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK                       0xff000000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT                      24
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK;
+}
+#define REG_A3XX_SP_FS_CTRL_REG1                                0x000022e1
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK                  0x000003ff
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT                 0
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK               0x000ffc00
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT              10
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK           0x00f00000
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT          20
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK            0x3f000000
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT           24
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK;
+}
+#define REG_A3XX_SP_FS_OBJ_OFFSET_REG                           0x000022e2
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK       0x01ff0000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT      16
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK         0xfe000000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT        25
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+#define REG_A3XX_SP_FS_OBJ_START_REG                            0x000022e3
+#define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG                        0x000022e4
+#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG                         0x000022e5
+#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG                         0x000022e6
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0                     0x000022e8
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1                     0x000022e9
+#define REG_A3XX_SP_FS_OUTPUT_REG                               0x000022ec
+#define A3XX_SP_FS_OUTPUT_REG_MRT__MASK                         0x00000003
+#define A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT                        0
+static inline uint32_t A3XX_SP_FS_OUTPUT_REG_MRT(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A3XX_SP_FS_OUTPUT_REG_MRT__MASK;
+}
+#define A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE                      0x00000080
+#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK                 0x0000ff00
+#define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT                8
+static inline uint32_t A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK;
+}
+static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+#define A3XX_SP_FS_MRT_REG_REGID__MASK                          0x000000ff
+#define A3XX_SP_FS_MRT_REG_REGID__SHIFT                         0
+static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK;
+}
+#define A3XX_SP_FS_MRT_REG_HALF_PRECISION                       0x00000100
+#define A3XX_SP_FS_MRT_REG_SINT                                 0x00000400
+#define A3XX_SP_FS_MRT_REG_UINT                                 0x00000800
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK             0x0000003f
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT            0
+static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
+{
+        return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK;
+}
+#define REG_A3XX_SP_FS_LENGTH_REG                               0x000022ff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK                0xffffffff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT               0
+static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+        return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+#define REG_A3XX_PA_SC_AA_CONFIG                                0x00002301
+#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET                          0x00002340
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK          0x000000ff
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT         0
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+        return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK           0x0000ff00
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT          8
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK           0xffff0000
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT          16
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+        return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR              0x00002341
+#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET                          0x00002342
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK          0x000000ff
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT         0
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+        return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK           0x0000ff00
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT          8
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+        return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK           0xffff0000
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT          16
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+        return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR              0x00002343
+#define REG_A3XX_VBIF_CLKON                                     0x00003001
+#define REG_A3XX_VBIF_FIXED_SORT_EN                             0x0000300c
+#define REG_A3XX_VBIF_FIXED_SORT_SEL0                           0x0000300d
+#define REG_A3XX_VBIF_FIXED_SORT_SEL1                           0x0000300e
+#define REG_A3XX_VBIF_ABIT_SORT                                 0x0000301c
+#define REG_A3XX_VBIF_ABIT_SORT_CONF                            0x0000301d
+#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN                         0x0000302a
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF0                           0x0000302c
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF1                           0x0000302d
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF0                           0x00003030
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF1                           0x00003031
+#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0                          0x00003034
+#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0                          0x00003035
+#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST                         0x00003036
+#define REG_A3XX_VBIF_ARB_CTL                                   0x0000303c
+#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB                       0x00003049
+#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0                    0x00003058
+#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN                           0x0000305e
+#define REG_A3XX_VBIF_OUT_AXI_AOOO                              0x0000305f
+#define REG_A3XX_VBIF_PERF_CNT_EN                               0x00003070
+#define A3XX_VBIF_PERF_CNT_EN_CNT0                              0x00000001
+#define A3XX_VBIF_PERF_CNT_EN_CNT1                              0x00000002
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0                           0x00000004
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1                           0x00000008
+#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2                           0x00000010
+#define REG_A3XX_VBIF_PERF_CNT_CLR                              0x00003071
+#define A3XX_VBIF_PERF_CNT_CLR_CNT0                             0x00000001
+#define A3XX_VBIF_PERF_CNT_CLR_CNT1                             0x00000002
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0                          0x00000004
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1                          0x00000008
+#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2                          0x00000010
+#define REG_A3XX_VBIF_PERF_CNT_SEL                              0x00003072
+#define REG_A3XX_VBIF_PERF_CNT0_LO                              0x00003073
+#define REG_A3XX_VBIF_PERF_CNT0_HI                              0x00003074
+#define REG_A3XX_VBIF_PERF_CNT1_LO                              0x00003075
+#define REG_A3XX_VBIF_PERF_CNT1_HI                              0x00003076
+#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO                          0x00003077
+#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI                          0x00003078
+#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO                          0x00003079
+#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI                          0x0000307a
+#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO                          0x0000307b
+#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI                          0x0000307c
+#define REG_A3XX_VSC_BIN_SIZE                                   0x00000c01
+#define A3XX_VSC_BIN_SIZE_WIDTH__MASK                           0x0000001f
+#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT                          0
+static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+        return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK                          0x000003e0
+#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT                         5
+static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+        return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+#define REG_A3XX_VSC_SIZE_ADDRESS                               0x00000c02
+static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+#define A3XX_VSC_PIPE_CONFIG_X__MASK                            0x000003ff
+#define A3XX_VSC_PIPE_CONFIG_X__SHIFT                           0
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
+{
+        return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_Y__MASK                            0x000ffc00
+#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT                           10
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val)
+{
+        return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_W__MASK                            0x00f00000
+#define A3XX_VSC_PIPE_CONFIG_W__SHIFT                           20
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val)
+{
+        return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_H__MASK                            0x0f000000
+#define A3XX_VSC_PIPE_CONFIG_H__SHIFT                           24
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
+{
+        return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
+}
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+#define REG_A3XX_VSC_BIN_CONTROL                                0x00000c3c
+#define A3XX_VSC_BIN_CONTROL_BINNING_ENABLE                     0x00000001
+#define REG_A3XX_UNKNOWN_0C3D                                   0x00000c3d
+#define REG_A3XX_PC_PERFCOUNTER0_SELECT                         0x00000c48
+#define REG_A3XX_PC_PERFCOUNTER1_SELECT                         0x00000c49
+#define REG_A3XX_PC_PERFCOUNTER2_SELECT                         0x00000c4a
+#define REG_A3XX_PC_PERFCOUNTER3_SELECT                         0x00000c4b
+#define REG_A3XX_GRAS_TSE_DEBUG_ECO                             0x00000c81
+#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT                       0x00000c88
+#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT                       0x00000c89
+#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT                       0x00000c8a
+#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT                       0x00000c8b
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; }
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; }
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; }
+#define REG_A3XX_RB_GMEM_BASE_ADDR                              0x00000cc0
+#define REG_A3XX_RB_DEBUG_ECO_CONTROLS_ADDR                     0x00000cc1
+#define REG_A3XX_RB_PERFCOUNTER0_SELECT                         0x00000cc6
+#define REG_A3XX_RB_PERFCOUNTER1_SELECT                         0x00000cc7
+#define REG_A3XX_RB_FRAME_BUFFER_DIMENSION                      0x00000ce0
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK              0x00003fff
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT             0
+static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val)
+{
+        return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK;
+}
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK             0x0fffc000
+#define A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT            14
+static inline uint32_t A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val)
+{
+        return ((val) << A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK;
+}
+#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT                       0x00000e00
+#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT                       0x00000e01
+#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT                       0x00000e02
+#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT                       0x00000e03
+#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT                       0x00000e04
+#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT                       0x00000e05
+#define REG_A3XX_UNKNOWN_0E43                                   0x00000e43
+#define REG_A3XX_VFD_PERFCOUNTER0_SELECT                        0x00000e44
+#define REG_A3XX_VFD_PERFCOUNTER1_SELECT                        0x00000e45
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL                          0x00000e61
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ                         0x00000e62
+#define REG_A3XX_VPC_PERFCOUNTER0_SELECT                        0x00000e64
+#define REG_A3XX_VPC_PERFCOUNTER1_SELECT                        0x00000e65
+#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG                    0x00000e82
+#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT                       0x00000e84
+#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT                       0x00000e85
+#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT                       0x00000e86
+#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT                       0x00000e87
+#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT                       0x00000e88
+#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT                       0x00000e89
+#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG                     0x00000ea0
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK              0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT             0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val)
+{
+        return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK;
+}
+#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG                     0x00000ea1
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK              0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT             0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val)
+{
+        return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK            0x30000000
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT           28
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val)
+{
+        return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE            0x80000000
+#define REG_A3XX_UNKNOWN_0EA6                                   0x00000ea6
+#define REG_A3XX_SP_PERFCOUNTER0_SELECT                         0x00000ec4
+#define REG_A3XX_SP_PERFCOUNTER1_SELECT                         0x00000ec5
+#define REG_A3XX_SP_PERFCOUNTER2_SELECT                         0x00000ec6
+#define REG_A3XX_SP_PERFCOUNTER3_SELECT                         0x00000ec7
+#define REG_A3XX_SP_PERFCOUNTER4_SELECT                         0x00000ec8
+#define REG_A3XX_SP_PERFCOUNTER5_SELECT                         0x00000ec9
+#define REG_A3XX_SP_PERFCOUNTER6_SELECT                         0x00000eca
+#define REG_A3XX_SP_PERFCOUNTER7_SELECT                         0x00000ecb
+#define REG_A3XX_UNKNOWN_0EE0                                   0x00000ee0
+#define REG_A3XX_UNKNOWN_0F03                                   0x00000f03
+#define REG_A3XX_TP_PERFCOUNTER0_SELECT                         0x00000f04
+#define REG_A3XX_TP_PERFCOUNTER1_SELECT                         0x00000f05
+#define REG_A3XX_TP_PERFCOUNTER2_SELECT                         0x00000f06
+#define REG_A3XX_TP_PERFCOUNTER3_SELECT                         0x00000f07
+#define REG_A3XX_TP_PERFCOUNTER4_SELECT                         0x00000f08
+#define REG_A3XX_TP_PERFCOUNTER5_SELECT                         0x00000f09
+#define REG_A3XX_VGT_CL_INITIATOR                               0x000021f0
+#define REG_A3XX_VGT_EVENT_INITIATOR                            0x000021f9
+#define REG_A3XX_VGT_DRAW_INITIATOR                             0x000021fc
+#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK                 0x0000003f
+#define A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT                0
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE(enum pc_di_primtype val)
+{
+        return ((val) << A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_PRIM_TYPE__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK             0x000000c0
+#define A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT            6
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+        return ((val) << A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__SHIFT) & A3XX_VGT_DRAW_INITIATOR_SOURCE_SELECT__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK                  0x00000600
+#define A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT                 9
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+        return ((val) << A3XX_VGT_DRAW_INITIATOR_VIS_CULL__SHIFT) & A3XX_VGT_DRAW_INITIATOR_VIS_CULL__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK                0x00000800
+#define A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT               11
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE(enum pc_di_index_size val)
+{
+        return ((val) << A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__SHIFT) & A3XX_VGT_DRAW_INITIATOR_INDEX_SIZE__MASK;
+}
+#define A3XX_VGT_DRAW_INITIATOR_NOT_EOP                         0x00001000
+#define A3XX_VGT_DRAW_INITIATOR_SMALL_INDEX                     0x00002000
+#define A3XX_VGT_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE       0x00004000
+#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK             0xff000000
+#define A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT            24
+static inline uint32_t A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES(uint32_t val)
+{
+        return ((val) << A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__SHIFT) & A3XX_VGT_DRAW_INITIATOR_NUM_INSTANCES__MASK;
+}
+#define REG_A3XX_VGT_IMMED_DATA                                 0x000021fd
+#define REG_A3XX_TEX_SAMP_0                                     0x00000000
+#define A3XX_TEX_SAMP_0_MIPFILTER_LINEAR                        0x00000002
+#define A3XX_TEX_SAMP_0_XY_MAG__MASK                            0x0000000c
+#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT                           2
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val)
+{
+        return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A3XX_TEX_SAMP_0_XY_MIN__MASK                            0x00000030
+#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT                           4
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val)
+{
+        return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_S__MASK                            0x000001c0
+#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT                           6
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val)
+{
+        return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_T__MASK                            0x00000e00
+#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT                           9
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val)
+{
+        return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_R__MASK                            0x00007000
+#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT                           12
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
+{
+        return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A3XX_TEX_SAMP_0_ANISO__MASK                             0x00038000
+#define A3XX_TEX_SAMP_0_ANISO__SHIFT                            15
+static inline uint32_t A3XX_TEX_SAMP_0_ANISO(enum a3xx_tex_aniso val)
+{
+        return ((val) << A3XX_TEX_SAMP_0_ANISO__SHIFT) & A3XX_TEX_SAMP_0_ANISO__MASK;
+}
+#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK                      0x00700000
+#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT                     20
+static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val)
+{
+        return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK;
+}
+#define A3XX_TEX_SAMP_0_UNNORM_COORDS                           0x80000000
+#define REG_A3XX_TEX_SAMP_1                                     0x00000001
+#define A3XX_TEX_SAMP_1_LOD_BIAS__MASK                          0x000007ff
+#define A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT                         0
+static inline uint32_t A3XX_TEX_SAMP_1_LOD_BIAS(float val)
+{
+        return ((((int32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT) & A3XX_TEX_SAMP_1_LOD_BIAS__MASK;
+}
+#define A3XX_TEX_SAMP_1_MAX_LOD__MASK                           0x003ff000
+#define A3XX_TEX_SAMP_1_MAX_LOD__SHIFT                          12
+static inline uint32_t A3XX_TEX_SAMP_1_MAX_LOD(float val)
+{
+        return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A3XX_TEX_SAMP_1_MAX_LOD__MASK;
+}
+#define A3XX_TEX_SAMP_1_MIN_LOD__MASK                           0xffc00000
+#define A3XX_TEX_SAMP_1_MIN_LOD__SHIFT                          22
+static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val)
+{
+        return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A3XX_TEX_SAMP_1_MIN_LOD__MASK;
+}
+#define REG_A3XX_TEX_CONST_0                                    0x00000000
+#define A3XX_TEX_CONST_0_TILED                                  0x00000001
+#define A3XX_TEX_CONST_0_SRGB                                   0x00000004
+#define A3XX_TEX_CONST_0_SWIZ_X__MASK                           0x00000070
+#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT                          4
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
+{
+        return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Y__MASK                           0x00000380
+#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT                          7
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val)
+{
+        return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Z__MASK                           0x00001c00
+#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT                          10
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val)
+{
+        return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_W__MASK                           0x0000e000
+#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT                          13
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val)
+{
+        return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A3XX_TEX_CONST_0_MIPLVLS__MASK                          0x000f0000
+#define A3XX_TEX_CONST_0_MIPLVLS__SHIFT                         16
+static inline uint32_t A3XX_TEX_CONST_0_MIPLVLS(uint32_t val)
+{
+        return ((val) << A3XX_TEX_CONST_0_MIPLVLS__SHIFT) & A3XX_TEX_CONST_0_MIPLVLS__MASK;
+}
+#define A3XX_TEX_CONST_0_FMT__MASK                              0x1fc00000
+#define A3XX_TEX_CONST_0_FMT__SHIFT                             22
+static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
+{
+        return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
+}
+#define A3XX_TEX_CONST_0_NOCONVERT                              0x20000000
+#define A3XX_TEX_CONST_0_TYPE__MASK                             0xc0000000
+#define A3XX_TEX_CONST_0_TYPE__SHIFT                            30
+static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
+{
+        return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK;
+}
+#define REG_A3XX_TEX_CONST_1                                    0x00000001
+#define A3XX_TEX_CONST_1_HEIGHT__MASK                           0x00003fff
+#define A3XX_TEX_CONST_1_HEIGHT__SHIFT                          0
+static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+        return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK;
+}
+#define A3XX_TEX_CONST_1_WIDTH__MASK                            0x0fffc000
+#define A3XX_TEX_CONST_1_WIDTH__SHIFT                           14
+static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+        return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define A3XX_TEX_CONST_1_FETCHSIZE__MASK                        0xf0000000
+#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT                       28
+static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val)
+{
+        return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK;
+}
+#define REG_A3XX_TEX_CONST_2                                    0x00000002
+#define A3XX_TEX_CONST_2_INDX__MASK                             0x000000ff
+#define A3XX_TEX_CONST_2_INDX__SHIFT                            0
+static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
+{
+        return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK;
+}
+#define A3XX_TEX_CONST_2_PITCH__MASK                            0x3ffff000
+#define A3XX_TEX_CONST_2_PITCH__SHIFT                           12
+static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+        return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A3XX_TEX_CONST_2_SWAP__MASK                             0xc0000000
+#define A3XX_TEX_CONST_2_SWAP__SHIFT                            30
+static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK;
+}
+#define REG_A3XX_TEX_CONST_3                                    0x00000003
+#define A3XX_TEX_CONST_3_LAYERSZ1__MASK                         0x00007fff
+#define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT                        0
+static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val)
+{
+        return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ1__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ1__MASK;
+}
+#define A3XX_TEX_CONST_3_DEPTH__MASK                            0x0ffe0000
+#define A3XX_TEX_CONST_3_DEPTH__SHIFT                           17
+static inline uint32_t A3XX_TEX_CONST_3_DEPTH(uint32_t val)
+{
+        return ((val) << A3XX_TEX_CONST_3_DEPTH__SHIFT) & A3XX_TEX_CONST_3_DEPTH__MASK;
+}
+#define A3XX_TEX_CONST_3_LAYERSZ2__MASK                         0xf0000000
+#define A3XX_TEX_CONST_3_LAYERSZ2__SHIFT                        28
+static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ2(uint32_t val)
+{
+        return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ2__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ2__MASK;
+}
+#endif /* A3XX_XML */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
 ,0 → 1,135
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_blend.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd3_blend.h"
+#include "fd3_context.h"
+#include "fd3_format.h"
+static enum a3xx_rb_blend_opcode
+blend_func(unsigned func)
+{
+        switch (func) {
+        case PIPE_BLEND_ADD:
+                return BLEND_DST_PLUS_SRC;
+        case PIPE_BLEND_MIN:
+                return BLEND_MIN_DST_SRC;
+        case PIPE_BLEND_MAX:
+                return BLEND_MAX_DST_SRC;
+        case PIPE_BLEND_SUBTRACT:
+                return BLEND_SRC_MINUS_DST;
+        case PIPE_BLEND_REVERSE_SUBTRACT:
+                return BLEND_DST_MINUS_SRC;
+        default:
+                DBG("invalid blend func: %x", func);
+                return 0;
+        }
+}
+void *
+fd3_blend_state_create(struct pipe_context *pctx,
+                const struct pipe_blend_state *cso)
+{
+        struct fd3_blend_stateobj *so;
+        enum a3xx_rop_code rop = ROP_COPY;
+        bool reads_dest = false;
+        int i;
+        if (cso->logicop_enable) {
+                rop = cso->logicop_func;  /* maps 1:1 */
+                switch (cso->logicop_func) {
+                case PIPE_LOGICOP_NOR:
+                case PIPE_LOGICOP_AND_INVERTED:
+                case PIPE_LOGICOP_AND_REVERSE:
+                case PIPE_LOGICOP_INVERT:
+                case PIPE_LOGICOP_XOR:
+                case PIPE_LOGICOP_NAND:
+                case PIPE_LOGICOP_AND:
+                case PIPE_LOGICOP_EQUIV:
+                case PIPE_LOGICOP_NOOP:
+                case PIPE_LOGICOP_OR_INVERTED:
+                case PIPE_LOGICOP_OR_REVERSE:
+                case PIPE_LOGICOP_OR:
+                        reads_dest = true;
+                        break;
+                }
+        }
+        so = CALLOC_STRUCT(fd3_blend_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+                const struct pipe_rt_blend_state *rt;
+                if (cso->independent_blend_enable)
+                        rt = &cso->rt[i];
+                else
+                        rt = &cso->rt[0];
+                so->rb_mrt[i].blend_control_rgb =
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor));
+                so->rb_mrt[i].blend_control_alpha =
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+                so->rb_mrt[i].blend_control_no_alpha_rgb =
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor)));
+                so->rb_mrt[i].control =
+                                A3XX_RB_MRT_CONTROL_ROP_CODE(rop) |
+                                A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+                if (rt->blend_enable)
+                        so->rb_mrt[i].control |=
+                                        A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+                                        A3XX_RB_MRT_CONTROL_BLEND |
+                                        A3XX_RB_MRT_CONTROL_BLEND2;
+                if (reads_dest)
+                        so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+                if (cso->dither)
+                        so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
+        }
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
 ,0 → 1,57
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_BLEND_H_
+#define FD3_BLEND_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+struct fd3_blend_stateobj {
+        struct pipe_blend_state base;
+        struct {
+                /* Blend control bits for color if there is an alpha channel */
+                uint32_t blend_control_rgb;
+                /* Blend control bits for color if there is no alpha channel */
+                uint32_t blend_control_no_alpha_rgb;
+                /* Blend control bits for alpha channel */
+                uint32_t blend_control_alpha;
+                uint32_t control;
+        } rb_mrt[4];
+};
+static INLINE struct fd3_blend_stateobj *
+fd3_blend_stateobj(struct pipe_blend_state *blend)
+{
+        return (struct fd3_blend_stateobj *)blend;
+}
+void * fd3_blend_state_create(struct pipe_context *pctx,
+                const struct pipe_blend_state *cso);
+#endif /* FD3_BLEND_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_context.c
 ,0 → 1,177
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "fd3_context.h"
+#include "fd3_blend.h"
+#include "fd3_draw.h"
+#include "fd3_emit.h"
+#include "fd3_gmem.h"
+#include "fd3_program.h"
+#include "fd3_query.h"
+#include "fd3_rasterizer.h"
+#include "fd3_texture.h"
+#include "fd3_zsa.h"
+static void
+fd3_context_destroy(struct pipe_context *pctx)
+{
+        struct fd3_context *fd3_ctx = fd3_context(fd_context(pctx));
+        util_dynarray_fini(&fd3_ctx->rbrc_patches);
+        fd_bo_del(fd3_ctx->vs_pvt_mem);
+        fd_bo_del(fd3_ctx->fs_pvt_mem);
+        fd_bo_del(fd3_ctx->vsc_size_mem);
+        pctx->delete_vertex_elements_state(pctx, fd3_ctx->solid_vbuf_state.vtx);
+        pctx->delete_vertex_elements_state(pctx, fd3_ctx->blit_vbuf_state.vtx);
+        pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
+        pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
+        u_upload_destroy(fd3_ctx->border_color_uploader);
+        fd_context_destroy(pctx);
+}
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+        static const float init_shader_const[] = {
+                        -1.000000, +1.000000, +1.000000,
+                        +1.000000, -1.000000, +1.000000,
+        };
+        struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                        PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+        pipe_buffer_write(pctx, prsc, 0,
+                        sizeof(init_shader_const), init_shader_const);
+        return prsc;
+}
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+        struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                        PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+        return prsc;
+}
+static const uint8_t primtypes[PIPE_PRIM_MAX] = {
+                [PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A3XX,
+                [PIPE_PRIM_LINES]          = DI_PT_LINELIST,
+                [PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
+                [PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
+                [PIPE_PRIM_TRIANGLES]      = DI_PT_TRILIST,
+                [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+                [PIPE_PRIM_TRIANGLE_FAN]   = DI_PT_TRIFAN,
+};
+struct pipe_context *
+fd3_context_create(struct pipe_screen *pscreen, void *priv)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
+        struct pipe_context *pctx;
+        if (!fd3_ctx)
+                return NULL;
+        pctx = &fd3_ctx->base.base;
+        fd3_ctx->base.dev = fd_device_ref(screen->dev);
+        fd3_ctx->base.screen = fd_screen(pscreen);
+        pctx->destroy = fd3_context_destroy;
+        pctx->create_blend_state = fd3_blend_state_create;
+        pctx->create_rasterizer_state = fd3_rasterizer_state_create;
+        pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
+        fd3_draw_init(pctx);
+        fd3_gmem_init(pctx);
+        fd3_texture_init(pctx);
+        fd3_prog_init(pctx);
+        pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv);
+        if (!pctx)
+                return NULL;
+        util_dynarray_init(&fd3_ctx->rbrc_patches);
+        fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+        fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+        fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
+                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+        fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+        fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+        /* setup solid_vbuf_state: */
+        fd3_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                        pctx, 1, (struct pipe_vertex_element[]){{
+                                .vertex_buffer_index = 0,
+                                .src_offset = 0,
+                                .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                        }});
+        fd3_ctx->solid_vbuf_state.vertexbuf.count = 1;
+        fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+        fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->solid_vbuf;
+        /* setup blit_vbuf_state: */
+        fd3_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                        pctx, 2, (struct pipe_vertex_element[]){{
+                                .vertex_buffer_index = 0,
+                                .src_offset = 0,
+                                .src_format = PIPE_FORMAT_R32G32_FLOAT,
+                        }, {
+                                .vertex_buffer_index = 1,
+                                .src_offset = 0,
+                                .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                        }});
+        fd3_ctx->blit_vbuf_state.vertexbuf.count = 2;
+        fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+        fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->blit_texcoord_vbuf;
+        fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+        fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd3_ctx->solid_vbuf;
+        fd3_query_context_init(pctx);
+        fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096,
+* PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
+        return pctx;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_context.h
 ,0 → 1,127
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_CONTEXT_H_
+#define FD3_CONTEXT_H_
+#include "util/u_upload_mgr.h"
+#include "freedreno_drmif.h"
+#include "freedreno_context.h"
+#include "ir3_shader.h"
+struct fd3_context {
+        struct fd_context base;
+        /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
+         * once we know whether or not to use GMEM, and GMEM tile pitch.
+         */
+        struct util_dynarray rbrc_patches;
+        struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+        /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes).  We
+         * could combine it with another allocation.
+         */
+        struct fd_bo *vsc_size_mem;
+        /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+         * vertices:
+         */
+        struct pipe_resource *solid_vbuf;
+        /* vertex buf used for mem->gmem tex coords:
+         */
+        struct pipe_resource *blit_texcoord_vbuf;
+        /* vertex state for solid_vbuf:
+         *    - solid_vbuf / 12 / R32G32B32_FLOAT
+         */
+        struct fd_vertex_state solid_vbuf_state;
+        /* vertex state for blit_prog:
+         *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
+         *    - solid_vbuf / 12 / R32G32B32_FLOAT
+         */
+        struct fd_vertex_state blit_vbuf_state;
+        /*
+         * Border color layout *appears* to be as arrays of 0x40 byte
+         * elements, with frag shader elements starting at (16 x 0x40).
+         * But at some point I should probably experiment more with
+         * samplers in vertex shaders to be sure.  Unclear about why
+         * there is this offset when there are separate VS and FS base
+         * addr regs.
+         *
+         * The first 8 bytes of each entry are the requested border
+         * color in fp16.  Unclear about the rest.. could be used for
+         * other formats, or could simply be for aligning the pitch
+         * to 32 pixels.
+         */
+#define BORDERCOLOR_SIZE 0x40
+        struct u_upload_mgr *border_color_uploader;
+        struct pipe_resource *border_color_buf;
+        /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+        bool vsaturate, fsaturate;
+        /* bitmask of sampler which needs coords clamped for vertex
+         * shader:
+         */
+        unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+        /* bitmask of sampler which needs coords clamped for frag
+         * shader:
+         */
+        unsigned fsaturate_s, fsaturate_t, fsaturate_r;
+        /* bitmask of integer texture samplers */
+        uint16_t vinteger_s, finteger_s;
+        /* some state changes require a different shader variant.  Keep
+         * track of this so we know when we need to re-emit shader state
+         * due to variant change.  See fixup_shader_state()
+         */
+        struct ir3_shader_key last_key;
+};
+static INLINE struct fd3_context *
+fd3_context(struct fd_context *ctx)
+{
+        return (struct fd3_context *)ctx;
+}
+struct pipe_context *
+fd3_context_create(struct pipe_screen *pscreen, void *priv);
+#endif /* FD3_CONTEXT_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
 ,0 → 1,377
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "fd3_draw.h"
+#include "fd3_context.h"
+#include "fd3_emit.h"
+#include "fd3_program.h"
+#include "fd3_format.h"
+#include "fd3_zsa.h"
+static inline uint32_t
+add_sat(uint32_t a, int32_t b)
+{
+        int64_t ret = (uint64_t)a + (int64_t)b;
+        if (ret > ~0U)
+                return ~0U;
+        if (ret < 0)
+                return 0;
+        return (uint32_t)ret;
+}
+static void
+draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                struct fd3_emit *emit)
+{
+        const struct pipe_draw_info *info = emit->info;
+        enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+        fd3_emit_state(ctx, ring, emit);
+        if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+                fd3_emit_vertex_bufs(ring, emit);
+        OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+        OUT_RING(ring, 0x0000000b);             /* PC_VERTEX_REUSE_BLOCK_CNTL */
+        OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+        OUT_RING(ring, add_sat(info->min_index, info->index_bias)); /* VFD_INDEX_MIN */
+        OUT_RING(ring, add_sat(info->max_index, info->index_bias)); /* VFD_INDEX_MAX */
+        OUT_RING(ring, info->start_instance);   /* VFD_INSTANCEID_OFFSET */
+        OUT_RING(ring, info->indexed ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */
+        OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+        OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+                        info->restart_index : 0xffffffff);
+        if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
+                info->mode == PIPE_PRIM_POINTS)
+                primtype = DI_PT_POINTLIST_A2XX;
+        fd_draw_emit(ctx, ring,
+                        primtype,
+                        emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+                        info);
+}
+/* fixup dirty shader state in case some "unrelated" (from the state-
+ * tracker's perspective) state change causes us to switch to a
+ * different variant.
+ */
+static void
+fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct ir3_shader_key *last_key = &fd3_ctx->last_key;
+        if (!ir3_shader_key_equal(last_key, key)) {
+                ctx->dirty |= FD_DIRTY_PROG;
+                if (last_key->has_per_samp || key->has_per_samp) {
+                        if ((last_key->vsaturate_s != key->vsaturate_s) ||
+                                        (last_key->vsaturate_t != key->vsaturate_t) ||
+                                        (last_key->vsaturate_r != key->vsaturate_r) ||
+                                        (last_key->vinteger_s != key->vinteger_s))
+                                ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+                        if ((last_key->fsaturate_s != key->fsaturate_s) ||
+                                        (last_key->fsaturate_t != key->fsaturate_t) ||
+                                        (last_key->fsaturate_r != key->fsaturate_r) ||
+                                        (last_key->finteger_s != key->finteger_s))
+                                ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                }
+                if (last_key->color_two_side != key->color_two_side)
+                        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                if (last_key->half_precision != key->half_precision)
+                        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                fd3_ctx->last_key = *key;
+        }
+}
+static void
+fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd3_emit emit = {
+                .vtx  = &ctx->vtx,
+                .prog = &ctx->prog,
+                .info = info,
+                .key = {
+                        /* do binning pass first: */
+                        .binning_pass = true,
+                        .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+                        // TODO set .half_precision based on render target format,
+                        // ie. float16 and smaller use half, float32 use full..
+                        .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+                        .has_per_samp = (fd3_ctx->fsaturate || fd3_ctx->vsaturate ||
+                                                         fd3_ctx->vinteger_s || fd3_ctx->finteger_s),
+                        .vsaturate_s = fd3_ctx->vsaturate_s,
+                        .vsaturate_t = fd3_ctx->vsaturate_t,
+                        .vsaturate_r = fd3_ctx->vsaturate_r,
+                        .fsaturate_s = fd3_ctx->fsaturate_s,
+                        .fsaturate_t = fd3_ctx->fsaturate_t,
+                        .fsaturate_r = fd3_ctx->fsaturate_r,
+                        .vinteger_s = fd3_ctx->vinteger_s,
+                        .finteger_s = fd3_ctx->finteger_s,
+                },
+                .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+                .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : 0,
+                .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
+        };
+        unsigned dirty;
+        fixup_shader_state(ctx, &emit.key);
+        dirty = ctx->dirty;
+        emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+        draw_impl(ctx, ctx->binning_ring, &emit);
+        /* and now regular (non-binning) pass: */
+        emit.key.binning_pass = false;
+        emit.dirty = dirty;
+        emit.vp = NULL;   /* we changed key so need to refetch vp */
+        draw_impl(ctx, ctx->ring, &emit);
+}
+/* clear operations ignore viewport state, so we need to reset it
+ * based on framebuffer state:
+ */
+static void
+reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb)
+{
+        float half_width = pfb->width * 0.5f;
+        float half_height = pfb->height * 0.5f;
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 4);
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(half_width - 0.5));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(half_width));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(half_height - 0.5));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-half_height));
+}
+/* binning pass cmds for a clear:
+ * NOTE: newer blob drivers don't use binning for clear, which is probably
+ * preferable since it is low vtx count.  However that doesn't seem to
+ * actually work for me.  Not sure if it is depending on support for
+ * clear pass (rather than using solid-fill shader), or something else
+ * that newer blob is doing differently.  Once that is figured out, we
+ * can remove fd3_clear_binning().
+ */
+static void
+fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_ringbuffer *ring = ctx->binning_ring;
+        struct fd3_emit emit = {
+                .vtx  = &fd3_ctx->solid_vbuf_state,
+                .prog = &ctx->solid_prog,
+                .key = {
+                        .binning_pass = true,
+                        .half_precision = true,
+                },
+                .dirty = dirty,
+        };
+        fd3_emit_state(ctx, ring, &emit);
+        fd3_emit_vertex_bufs(ring, &emit);
+        reset_viewport(ring, &ctx->framebuffer);
+        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+        OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+        OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+        OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+        OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */
+        fd_event_write(ctx, ring, PERFCOUNTER_STOP);
+        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+static void
+fd3_clear(struct fd_context *ctx, unsigned buffers,
+                const union pipe_color_union *color, double depth, unsigned stencil)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd_ringbuffer *ring = ctx->ring;
+        unsigned dirty = ctx->dirty;
+        unsigned i;
+        struct fd3_emit emit = {
+                .vtx  = &fd3_ctx->solid_vbuf_state,
+                .prog = &ctx->solid_prog,
+                .key = {
+                        .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
+                                                           fd3_half_precision(pfb->cbufs[1]) &&
+                                                           fd3_half_precision(pfb->cbufs[2]) &&
+                                                           fd3_half_precision(pfb->cbufs[3])),
+                },
+        };
+        dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
+        dirty |= FD_DIRTY_PROG;
+        emit.dirty = dirty;
+        fd3_clear_binning(ctx, dirty);
+        /* emit generic state now: */
+        fd3_emit_state(ctx, ring, &emit);
+        reset_viewport(ring, &ctx->framebuffer);
+        OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
+        OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
+                        A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
+        OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+        OUT_RINGP(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER),
+                        &fd3_ctx->rbrc_patches);
+        if (buffers & PIPE_CLEAR_DEPTH) {
+                OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+                OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+                                A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                                A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
+                fd_wfi(ctx, ring);
+                OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2);
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth));
+                ctx->dirty |= FD_DIRTY_VIEWPORT;
+        } else {
+                OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+                OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+        }
+        if (buffers & PIPE_CLEAR_STENCIL) {
+                OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+                OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+                                A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
+                                A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+                OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                                A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+xff000000 | // XXX ???
+                                A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+                OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+                OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                                A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+                                A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
+                                A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                                A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+        } else {
+                OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+                OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                                A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                                A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
+                OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+                                A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+                                A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));
+                OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+                OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                                A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                                A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                                A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+        }
+        for (i = 0; i < 4; i++) {
+                OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+                OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
+                                A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
+                                COND(buffers & (PIPE_CLEAR_COLOR0 << i),
+                                         A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)));
+                OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+                OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+        }
+        OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+        fd3_emit_vertex_bufs(ring, &emit);
+        fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+        OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+        OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+        OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+        OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */
+        fd_event_write(ctx, ring, PERFCOUNTER_STOP);
+        fd_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+void
+fd3_draw_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->draw_vbo = fd3_draw_vbo;
+        ctx->clear = fd3_clear;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_draw.h
 ,0 → 1,38
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_DRAW_H_
+#define FD3_DRAW_H_
+#include "pipe/p_context.h"
+#include "freedreno_draw.h"
+void fd3_draw_init(struct pipe_context *pctx);
+#endif /* FD3_DRAW_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
 ,0 → 1,932
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
+#include "freedreno_resource.h"
+#include "fd3_emit.h"
+#include "fd3_blend.h"
+#include "fd3_context.h"
+#include "fd3_program.h"
+#include "fd3_rasterizer.h"
+#include "fd3_texture.h"
+#include "fd3_format.h"
+#include "fd3_zsa.h"
+/* regid:          base const register
+ * prsc or dwords: buffer containing constant values
+ * sizedwords:     size of const value buffer
+ */
+void
+fd3_emit_constant(struct fd_ringbuffer *ring,
+                enum adreno_state_block sb,
+                uint32_t regid, uint32_t offset, uint32_t sizedwords,
+                const uint32_t *dwords, struct pipe_resource *prsc)
+{
+        uint32_t i, sz;
+        enum adreno_state_src src;
+        if (prsc) {
+                sz = 0;
+                src = SS_INDIRECT;
+        } else {
+                sz = sizedwords;
+                src = SS_DIRECT;
+        }
+        OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+                        CP_LOAD_STATE_0_STATE_SRC(src) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                        CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
+        if (prsc) {
+                struct fd_bo *bo = fd_resource(prsc)->bo;
+                OUT_RELOC(ring, bo, offset,
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
+        } else {
+                OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+                dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
+        }
+        for (i = 0; i < sz; i++) {
+                OUT_RING(ring, dwords[i]);
+        }
+}
+static void
+emit_constants(struct fd_ringbuffer *ring,
+                enum adreno_state_block sb,
+                struct fd_constbuf_stateobj *constbuf,
+                struct ir3_shader_variant *shader,
+                bool emit_immediates)
+{
+        uint32_t enabled_mask = constbuf->enabled_mask;
+        uint32_t max_const;
+        int i;
+        // XXX TODO only emit dirty consts.. but we need to keep track if
+        // they are clobbered by a clear, gmem2mem, or mem2gmem..
+        constbuf->dirty_mask = enabled_mask;
+        /* in particular, with binning shader we may end up with unused
+         * consts, ie. we could end up w/ constlen that is smaller
+         * than first_immediate.  In that case truncate the user consts
+         * early to avoid HLSQ lockup caused by writing too many consts
+         */
+        max_const = MIN2(shader->first_driver_param, shader->constlen);
+        /* emit user constants: */
+        if (enabled_mask & 1) {
+                const unsigned index = 0;
+                struct pipe_constant_buffer *cb = &constbuf->cb[index];
+                unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+                // I expect that size should be a multiple of vec4's:
+                assert(size == align(size, 4));
+                /* and even if the start of the const buffer is before
+                 * first_immediate, the end may not be:
+                 */
+                size = MIN2(size, 4 * max_const);
+                if (size && constbuf->dirty_mask & (1 << index)) {
+                        fd3_emit_constant(ring, sb, 0,
+                                                          cb->buffer_offset, size,
+                                                          cb->user_buffer, cb->buffer);
+                        constbuf->dirty_mask &= ~(1 << index);
+                }
+                enabled_mask &= ~(1 << index);
+        }
+        if (shader->constlen > shader->first_driver_param) {
+                uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
+                /* emit ubos: */
+                OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
+                OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
+                                 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                                 CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                                 CP_LOAD_STATE_0_NUM_UNIT(params * 2));
+                OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                                 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+                for (i = 1; i <= params * 4; i++) {
+                        struct pipe_constant_buffer *cb = &constbuf->cb[i];
+                        assert(!cb->user_buffer);
+                        if ((enabled_mask & (1 << i)) && cb->buffer)
+                                OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
+                        else
+                                OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
+                }
+        }
+        /* emit shader immediates: */
+        if (shader && emit_immediates) {
+                int size = shader->immediates_count;
+                uint32_t base = shader->first_immediate;
+                /* truncate size to avoid writing constants that shader
+                 * does not use:
+                 */
+                size = MIN2(size + base, shader->constlen) - base;
+                /* convert out of vec4: */
+                base *= 4;
+                size *= 4;
+                if (size > 0) {
+                        fd3_emit_constant(ring, sb, base,
+, size, shader->immediates[0].val, NULL);
+                }
+        }
+}
+#define VERT_TEX_OFF    0
+#define FRAG_TEX_OFF    16
+#define BASETABLE_SZ    A3XX_MAX_MIP_LEVELS
+static void
+emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum adreno_state_block sb, struct fd_texture_stateobj *tex)
+{
+        static const unsigned tex_off[] = {
+                        [SB_VERT_TEX] = VERT_TEX_OFF,
+                        [SB_FRAG_TEX] = FRAG_TEX_OFF,
+        };
+        static const enum adreno_state_block mipaddr[] = {
+                        [SB_VERT_TEX] = SB_VERT_MIPADDR,
+                        [SB_FRAG_TEX] = SB_FRAG_MIPADDR,
+        };
+        static const uint32_t bcolor_reg[] = {
+                        [SB_VERT_TEX] = REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
+                        [SB_FRAG_TEX] = REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
+        };
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        unsigned i, j, off;
+        void *ptr;
+        u_upload_alloc(fd3_ctx->border_color_uploader,
+, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
+                        &fd3_ctx->border_color_buf,
+                        &ptr);
+        if (tex->num_samplers > 0) {
+                /* output sampler state: */
+                OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
+                OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+                                CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                                CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                                CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+                OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                                CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+                for (i = 0; i < tex->num_samplers; i++) {
+                        static const struct fd3_sampler_stateobj dummy_sampler = {};
+                        const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
+                                        fd3_sampler_stateobj(tex->samplers[i]) :
+                                        &dummy_sampler;
+                        uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
+                                        (BORDERCOLOR_SIZE * tex_off[sb]) +
+                                        (BORDERCOLOR_SIZE * i));
+                        uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
+                        /*
+                         * XXX HACK ALERT XXX
+                         *
+                         * The border colors need to be swizzled in a particular
+                         * format-dependent order. Even though samplers don't know about
+                         * formats, we can assume that with a GL state tracker, there's a
+                         * 1:1 correspondence between sampler and texture. Take advantage
+                         * of that knowledge.
+                         */
+                        if (i < tex->num_textures && tex->textures[i]) {
+                                const struct util_format_description *desc =
+                                        util_format_description(tex->textures[i]->format);
+                                for (j = 0; j < 4; j++) {
+                                        if (desc->swizzle[j] >= 4)
+                                                continue;
+                                        const struct util_format_channel_description *chan =
+                                                &desc->channel[desc->swizzle[j]];
+                                        int size = chan->size;
+                                        /* The Z16 texture format we use seems to look in the
+                                         * 32-bit border color slots
+                                         */
+                                        if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+                                                size = 32;
+                                        /* Formats like R11G11B10 or RGB9_E5 don't specify
+                                         * per-channel sizes properly.
+                                         */
+                                        if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
+                                                size = 16;
+                                        if (chan->pure_integer && size > 16)
+                                                bcolor32[desc->swizzle[j] + 4] =
+                                                        sampler->base.border_color.i[j];
+                                        else if (size > 16)
+                                                bcolor32[desc->swizzle[j]] =
+                                                        fui(sampler->base.border_color.f[j]);
+                                        else if (chan->pure_integer)
+                                                bcolor[desc->swizzle[j] + 8] =
+                                                        sampler->base.border_color.i[j];
+                                        else
+                                                bcolor[desc->swizzle[j]] =
+                                                        util_float_to_half(sampler->base.border_color.f[j]);
+                                }
+                        }
+                        OUT_RING(ring, sampler->texsamp0);
+                        OUT_RING(ring, sampler->texsamp1);
+                }
+        }
+        if (tex->num_textures > 0) {
+                /* emit texture state: */
+                OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures));
+                OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+                                CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                                CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                                CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
+                OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                                CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+                for (i = 0; i < tex->num_textures; i++) {
+                        static const struct fd3_pipe_sampler_view dummy_view = {};
+                        const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
+                                        fd3_pipe_sampler_view(tex->textures[i]) :
+                                        &dummy_view;
+                        OUT_RING(ring, view->texconst0);
+                        OUT_RING(ring, view->texconst1);
+                        OUT_RING(ring, view->texconst2 |
+                                        A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+                        OUT_RING(ring, view->texconst3);
+                }
+                /* emit mipaddrs: */
+                OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures));
+                OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) |
+                                CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                                CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) |
+                                CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures));
+                OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                                CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+                for (i = 0; i < tex->num_textures; i++) {
+                        static const struct fd3_pipe_sampler_view dummy_view = {
+                                        .base.u.tex.first_level = 1,
+                        };
+                        const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
+                                        fd3_pipe_sampler_view(tex->textures[i]) :
+                                        &dummy_view;
+                        struct fd_resource *rsc = fd_resource(view->base.texture);
+                        unsigned start = view->base.u.tex.first_level;
+                        unsigned end   = view->base.u.tex.last_level;
+                        for (j = 0; j < (end - start + 1); j++) {
+                                struct fd_resource_slice *slice =
+                                                fd_resource_slice(rsc, j + start);
+                                OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+                        }
+                        /* pad the remaining entries w/ null: */
+                        for (; j < BASETABLE_SZ; j++) {
+                                OUT_RING(ring, 0x00000000);
+                        }
+                }
+        }
+        OUT_PKT0(ring, bcolor_reg[sb], 1);
+        OUT_RELOC(ring, fd_resource(fd3_ctx->border_color_buf)->bo, off, 0, 0);
+        u_upload_unmap(fd3_ctx->border_color_uploader);
+}
+/* emit texture state for mem->gmem restore operation.. eventually it would
+ * be good to get rid of this and use normal CSO/etc state for more of these
+ * special cases, but for now the compiler is not sufficient..
+ *
+ * Also, for using normal state, not quite sure how to handle the special
+ * case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil.
+ */
+void
+fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+                                                  struct pipe_surface **psurf,
+                                                  int bufs)
+{
+        int i, j;
+        /* output sampler state: */
+        OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+                        CP_LOAD_STATE_0_NUM_UNIT(bufs));
+        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+        for (i = 0; i < bufs; i++) {
+                OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
+                                 A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
+                                 A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
+                                 A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
+                                 A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
+                OUT_RING(ring, 0x00000000);
+        }
+        /* emit texture state: */
+        OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+                        CP_LOAD_STATE_0_NUM_UNIT(bufs));
+        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+        for (i = 0; i < bufs; i++) {
+                if (!psurf[i]) {
+                        OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+                                A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) |
+                                A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) |
+                                A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) |
+                                A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE));
+                        OUT_RING(ring, 0x00000000);
+                        OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+                        OUT_RING(ring, 0x00000000);
+                        continue;
+                }
+                struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+                enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format);
+                /* The restore blit_zs shader expects stencil in sampler 0, and depth
+                 * in sampler 1
+                 */
+                if (rsc->stencil && i == 0) {
+                        rsc = rsc->stencil;
+                        format = fd3_gmem_restore_format(rsc->base.b.format);
+                }
+                unsigned lvl = psurf[i]->u.tex.level;
+                struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
+                debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer);
+                OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
+                                 A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
+                                 fd3_tex_swiz(format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+                                                          PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+                OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
+                                 A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) |
+                                 A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height));
+                OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
+                                 A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+                OUT_RING(ring, 0x00000000);
+        }
+        /* emit mipaddrs: */
+        OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
+                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
+                        CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs));
+        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+        for (i = 0; i < bufs; i++) {
+                if (psurf[i]) {
+                        struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+                        /* Matches above logic for blit_zs shader */
+                        if (rsc->stencil && i == 0)
+                                rsc = rsc->stencil;
+                        unsigned lvl = psurf[i]->u.tex.level;
+                        uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer);
+                        OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+                } else {
+                        OUT_RING(ring, 0x00000000);
+                }
+                /* pad the remaining entries w/ null: */
+                for (j = 1; j < BASETABLE_SZ; j++) {
+                        OUT_RING(ring, 0x00000000);
+                }
+        }
+}
+void
+fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
+{
+        int32_t i, j, last = -1;
+        uint32_t total_in = 0;
+        const struct fd_vertex_state *vtx = emit->vtx;
+        struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
+        unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+        for (i = 0; i < vp->inputs_count; i++) {
+                uint8_t semantic = sem2name(vp->inputs[i].semantic);
+                if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
+                        vertex_regid = vp->inputs[i].regid;
+                else if (semantic == TGSI_SEMANTIC_INSTANCEID)
+                        instance_regid = vp->inputs[i].regid;
+                else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+                        last = i;
+        }
+        /* hw doesn't like to be configured for zero vbo's, it seems: */
+        if (vtx->vtx->num_elements == 0 &&
+                vertex_regid == regid(63, 0) &&
+                instance_regid == regid(63, 0))
+                return;
+        for (i = 0, j = 0; i <= last; i++) {
+                assert(sem2name(vp->inputs[i].semantic) == 0);
+                if (vp->inputs[i].compmask) {
+                        struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+                        const struct pipe_vertex_buffer *vb =
+                                        &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+                        struct fd_resource *rsc = fd_resource(vb->buffer);
+                        enum pipe_format pfmt = elem->src_format;
+                        enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
+                        bool switchnext = (i != last) ||
+                                vertex_regid != regid(63, 0) ||
+                                instance_regid != regid(63, 0);
+                        bool isint = util_format_is_pure_integer(pfmt);
+                        uint32_t fs = util_format_get_blocksize(pfmt);
+                        debug_assert(fmt != ~0);
+                        OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
+                        OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+                                        A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+                                        COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+                                        A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
+                                        COND(elem->instance_divisor, A3XX_VFD_FETCH_INSTR_0_INSTANCED) |
+                                        A3XX_VFD_FETCH_INSTR_0_STEPRATE(MAX2(1, elem->instance_divisor)));
+                        OUT_RELOC(ring, rsc->bo, vb->buffer_offset + elem->src_offset, 0, 0);
+                        OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
+                        OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+                                        A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+                                        A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+                                        A3XX_VFD_DECODE_INSTR_SWAP(fd3_pipe2swap(pfmt)) |
+                                        A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+                                        A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+                                        A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+                                        COND(isint, A3XX_VFD_DECODE_INSTR_INT) |
+                                        COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+                        total_in += vp->inputs[i].ncomp;
+                        j++;
+                }
+        }
+        OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
+        OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+                        A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
+                        A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+                        A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+        OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
+                        A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+                        A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
+}
+void
+fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                struct fd3_emit *emit)
+{
+        struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
+        struct ir3_shader_variant *fp = fd3_emit_get_fp(emit);
+        uint32_t dirty = emit->dirty;
+        emit_marker(ring, 5);
+        if (dirty & FD_DIRTY_SAMPLE_MASK) {
+                OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+                OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+                                A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+                                A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
+        }
+        if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
+                uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
+                val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
+                val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
+                                A3XX_RB_RENDER_CONTROL_YCOORD |
+                                A3XX_RB_RENDER_CONTROL_ZCOORD |
+                                A3XX_RB_RENDER_CONTROL_WCOORD);
+                /* I suppose if we needed to (which I don't *think* we need
+                 * to), we could emit this for binning pass too.  But we
+                 * would need to keep a different patch-list for binning
+                 * vs render pass.
+                 */
+                OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+                OUT_RINGP(ring, val, &fd3_context(ctx)->rbrc_patches);
+        }
+        if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+                struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
+                struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+                OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
+                OUT_RING(ring, zsa->rb_alpha_ref);
+                OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+                OUT_RING(ring, zsa->rb_stencil_control);
+                OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+                OUT_RING(ring, zsa->rb_stencilrefmask |
+                                A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+                OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+                                A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+        }
+        if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+                uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
+                if (fp->writes_pos) {
+                        val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
+                        val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+                }
+                if (fp->has_kill) {
+                        val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+                }
+                OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+                OUT_RING(ring, val);
+        }
+        if (dirty & FD_DIRTY_RASTERIZER) {
+                struct fd3_rasterizer_stateobj *rasterizer =
+                                fd3_rasterizer_stateobj(ctx->rasterizer);
+                OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+                OUT_RING(ring, rasterizer->gras_su_mode_control);
+                OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+                OUT_RING(ring, rasterizer->gras_su_point_minmax);
+                OUT_RING(ring, rasterizer->gras_su_point_size);
+                OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
+                OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+                OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+        }
+        if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+                uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
+                                ->gras_cl_clip_cntl;
+                val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
+                val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
+                                A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
+                OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+                OUT_RING(ring, val);
+        }
+        /* NOTE: since primitive_restart is not actually part of any
+         * state object, we need to make sure that we always emit
+         * PRIM_VTX_CNTL.. either that or be more clever and detect
+         * when it changes.
+         */
+        if (emit->info) {
+                const struct pipe_draw_info *info = emit->info;
+                uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
+                                ->pc_prim_vtx_cntl;
+                if (!emit->key.binning_pass) {
+                        uint32_t stride_in_vpc = align(fp->total_in, 4) / 4;
+                        if (stride_in_vpc > 0)
+                                stride_in_vpc = MAX2(stride_in_vpc, 2);
+                        val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc);
+                }
+                if (info->indexed && info->primitive_restart) {
+                        val |= A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
+                }
+                val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE);
+                OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+                OUT_RING(ring, val);
+        }
+        if (dirty & FD_DIRTY_SCISSOR) {
+                struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+                OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+                OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
+                                A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
+                OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
+                                A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
+                ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
+                ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
+                ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
+                ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
+        }
+        if (dirty & FD_DIRTY_VIEWPORT) {
+                fd_wfi(ctx, ring);
+                OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5));
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1]));
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2]));
+                OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
+        }
+        if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+                struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+                fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+        }
+        /* TODO we should not need this or fd_wfi() before emit_constants():
+         */
+        OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+        OUT_RING(ring, HLSQ_FLUSH);
+        if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
+                        /* evil hack to deal sanely with clear path: */
+                        (emit->prog == &ctx->prog)) {
+                fd_wfi(ctx, ring);
+                emit_constants(ring,  SB_VERT_SHADER,
+                                &ctx->constbuf[PIPE_SHADER_VERTEX],
+                                vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
+                if (!emit->key.binning_pass) {
+                        emit_constants(ring, SB_FRAG_SHADER,
+                                        &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+                                        fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
+                }
+        }
+        /* emit driver params every time */
+        if (emit->info && emit->prog == &ctx->prog) {
+                uint32_t vertex_params[4] = {
+                        emit->info->indexed ? emit->info->index_bias : emit->info->start,
+,
+,
+                };
+                if (vp->constlen >= vp->first_driver_param + 4) {
+                        fd3_emit_constant(ring, SB_VERT_SHADER,
+                                                          (vp->first_driver_param + 4) * 4,
+, 4, vertex_params, NULL);
+                }
+        }
+        if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
+                struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
+                uint32_t i;
+                for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) {
+                        enum pipe_format format = pipe_surface_format(ctx->framebuffer.cbufs[i]);
+                        const struct util_format_description *desc =
+                                util_format_description(format);
+                        bool is_float = util_format_is_float(format);
+                        bool is_int = util_format_is_pure_integer(format);
+                        bool has_alpha = util_format_has_alpha(format);
+                        uint32_t control = blend->rb_mrt[i].control;
+                        uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
+                        if (is_int) {
+                                control &= (A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK |
+                                                        A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK);
+                                control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
+                        }
+                        if (format == PIPE_FORMAT_NONE)
+                                control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+                        if (has_alpha) {
+                                blend_control |= blend->rb_mrt[i].blend_control_rgb;
+                        } else {
+                                blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb;
+                                control &= ~A3XX_RB_MRT_CONTROL_BLEND2;
+                        }
+                        if (format && util_format_get_component_bits(
+                                                format, UTIL_FORMAT_COLORSPACE_RGB, 0) < 8) {
+                                const struct pipe_rt_blend_state *rt;
+                                if (ctx->blend->independent_blend_enable)
+                                        rt = &ctx->blend->rt[i];
+                                else
+                                        rt = &ctx->blend->rt[0];
+                                if (!util_format_colormask_full(desc, rt->colormask))
+                                        control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+                        }
+                        OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+                        OUT_RING(ring, control);
+                        OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+                        OUT_RING(ring, blend_control |
+                                        COND(!is_float, A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE));
+                }
+        }
+        if (dirty & FD_DIRTY_BLEND_COLOR) {
+                struct pipe_blend_color *bcolor = &ctx->blend_color;
+                OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
+                OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
+                                A3XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
+                OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
+                                A3XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
+                OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
+                                A3XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
+                OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
+                                A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
+        }
+        if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
+                fd_wfi(ctx, ring);
+        if (dirty & FD_DIRTY_VERTTEX) {
+                if (vp->has_samp)
+                        emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
+                else
+                        dirty &= ~FD_DIRTY_VERTTEX;
+        }
+        if (dirty & FD_DIRTY_FRAGTEX) {
+                if (fp->has_samp)
+                        emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
+                else
+                        dirty &= ~FD_DIRTY_FRAGTEX;
+        }
+        ctx->dirty &= ~dirty;
+}
+/* emit setup at begin of new cmdstream buffer (don't rely on previous
+ * state, there could have been a context switch between ioctls):
+ */
+void
+fd3_emit_restore(struct fd_context *ctx)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        int i;
+        if (ctx->screen->gpu_id == 320) {
+                OUT_PKT3(ring, CP_REG_RMW, 3);
+                OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL);
+                OUT_RING(ring, 0xfffcffff);
+                OUT_RING(ring, 0x00000000);
+        }
+        fd_wfi(ctx, ring);
+        OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+        OUT_RING(ring, 0x00007fff);
+        OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_PARAM_REG, 3);
+        OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_CTRL_REG */
+        OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR_REG */
+        OUT_RING(ring, 0x00000000);                  /* SP_VS_PVT_MEM_SIZE_REG */
+        OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_PARAM_REG, 3);
+        OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_CTRL_REG */
+        OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR_REG */
+        OUT_RING(ring, 0x00000000);                  /* SP_FS_PVT_MEM_SIZE_REG */
+        OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+        OUT_RING(ring, 0x0000000b);                  /* PC_VERTEX_REUSE_BLOCK_CNTL */
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+        OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2);
+        OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+                        A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+                        A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+        OUT_RING(ring, 0x00000000);        /* RB_ALPHA_REF */
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+        OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+                        A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+        OUT_PKT0(ring, REG_A3XX_GRAS_TSE_DEBUG_ECO, 1);
+        OUT_RING(ring, 0x00000001);        /* GRAS_TSE_DEBUG_ECO */
+        OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1);
+        OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) |
+                        A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) |
+                        A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * VERT_TEX_OFF));
+        OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1);
+        OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) |
+                        A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) |
+                        A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * FRAG_TEX_OFF));
+        OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2);
+        OUT_RING(ring, 0x00000000);        /* VPC_VARY_CYLWRAP_ENABLE_0 */
+        OUT_RING(ring, 0x00000000);        /* VPC_VARY_CYLWRAP_ENABLE_1 */
+        OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1);
+        OUT_RING(ring, 0x00000001);        /* UNKNOWN_0E43 */
+        OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1);
+        OUT_RING(ring, 0x00000001);        /* UNKNOWN_0F03 */
+        OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1);
+        OUT_RING(ring, 0x00000003);        /* UNKNOWN_0EE0 */
+        OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
+        OUT_RING(ring, 0x00000001);        /* UNKNOWN_0C3D */
+        OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1);
+        OUT_RING(ring, 0x00000000);        /* HLSQ_PERFCOUNTER0_SELECT */
+        OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
+        OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
+                        A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0));
+        OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
+                        A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
+        OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+        OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+        OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+                        A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+                        A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+        OUT_RING(ring, 0x00000000);                  /* GRAS_CL_CLIP_CNTL */
+        OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+        OUT_RING(ring, 0xffc00010);        /* GRAS_SU_POINT_MINMAX */
+        OUT_RING(ring, 0x00000008);        /* GRAS_SU_POINT_SIZE */
+        OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+        OUT_RING(ring, 0xffffffff);        /* PC_RESTART_INDEX */
+        OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+        OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
+                        A3XX_RB_WINDOW_OFFSET_Y(0));
+        OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
+        OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(0) |
+                        A3XX_RB_BLEND_RED_FLOAT(0.0));
+        OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(0) |
+                        A3XX_RB_BLEND_GREEN_FLOAT(0.0));
+        OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(0) |
+                        A3XX_RB_BLEND_BLUE_FLOAT(0.0));
+        OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
+                        A3XX_RB_BLEND_ALPHA_FLOAT(1.0));
+        for (i = 0; i < 6; i++) {
+                OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4);
+                OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].X */
+                OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].Y */
+                OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].Z */
+                OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].W */
+        }
+        OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+        OUT_RING(ring, 0x00000000);
+        fd_event_write(ctx, ring, CACHE_FLUSH);
+        if (is_a3xx_p0(ctx->screen)) {
+                OUT_PKT3(ring, CP_DRAW_INDX, 3);
+                OUT_RING(ring, 0x00000000);
+                OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+                                                        INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0));
+                OUT_RING(ring, 0);                                      /* NumIndices */
+        }
+        OUT_PKT3(ring, CP_NOP, 4);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        fd_wfi(ctx, ring);
+        ctx->needs_rb_fbd = true;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
 ,0 → 1,93
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_EMIT_H
+#define FD3_EMIT_H
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "fd3_format.h"
+#include "fd3_program.h"
+#include "ir3_shader.h"
+struct fd_ringbuffer;
+enum adreno_state_block;
+void fd3_emit_constant(struct fd_ringbuffer *ring,
+                enum adreno_state_block sb,
+                uint32_t regid, uint32_t offset, uint32_t sizedwords,
+                const uint32_t *dwords, struct pipe_resource *prsc);
+void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+                struct pipe_surface **psurf, int bufs);
+/* grouped together emit-state for prog/vertex/state emit: */
+struct fd3_emit {
+        const struct fd_vertex_state *vtx;
+        const struct fd_program_stateobj *prog;
+        const struct pipe_draw_info *info;
+        struct ir3_shader_key key;
+        uint32_t dirty;
+        uint32_t sprite_coord_enable;
+        bool sprite_coord_mode;
+        bool rasterflat;
+        /* cached to avoid repeated lookups of same variants: */
+        struct ir3_shader_variant *vp, *fp;
+};
+static inline struct ir3_shader_variant *
+fd3_emit_get_vp(struct fd3_emit *emit)
+{
+        if (!emit->vp) {
+                struct fd3_shader_stateobj *so = emit->prog->vp;
+                emit->vp = ir3_shader_variant(so->shader, emit->key);
+        }
+        return emit->vp;
+}
+static inline struct ir3_shader_variant *
+fd3_emit_get_fp(struct fd3_emit *emit)
+{
+        if (!emit->fp) {
+                struct fd3_shader_stateobj *so = emit->prog->fp;
+                emit->fp = ir3_shader_variant(so->shader, emit->key);
+        }
+        return emit->fp;
+}
+void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit);
+void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                struct fd3_emit *emit);
+void fd3_emit_restore(struct fd_context *ctx);
+#endif /* FD3_EMIT_H */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_format.c
 ,0 → 1,384
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+#include "fd3_format.h"
+/* Specifies the table of all the formats and their features. Also supplies
+ * the helpers that look up various data in those tables.
+ */
+struct fd3_format {
+        enum a3xx_vtx_fmt vtx;
+        enum a3xx_tex_fmt tex;
+        enum a3xx_color_fmt rb;
+        enum a3xx_color_swap swap;
+        boolean present;
+};
+#define RB_NONE ~0
+/* vertex + texture */
+#define VT(pipe, fmt, rbfmt, swapfmt) \
+        [PIPE_FORMAT_ ## pipe] = { \
+                .present = 1, \
+                .vtx = VFMT_ ## fmt, \
+                .tex = TFMT_ ## fmt, \
+                .rb = RB_ ## rbfmt, \
+                .swap = swapfmt \
+        }
+/* texture-only */
+#define _T(pipe, fmt, rbfmt, swapfmt) \
+        [PIPE_FORMAT_ ## pipe] = { \
+                .present = 1, \
+                .vtx = ~0, \
+                .tex = TFMT_ ## fmt, \
+                .rb = RB_ ## rbfmt, \
+                .swap = swapfmt \
+        }
+/* vertex-only */
+#define V_(pipe, fmt, rbfmt, swapfmt) \
+        [PIPE_FORMAT_ ## pipe] = { \
+                .present = 1, \
+                .vtx = VFMT_ ## fmt, \
+                .tex = ~0, \
+                .rb = RB_ ## rbfmt, \
+                .swap = swapfmt \
+        }
+static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
+        /* 8-bit */
+        VT(R8_UNORM,   8_UNORM, R8_UNORM, WZYX),
+        VT(R8_SNORM,   8_SNORM, NONE,     WZYX),
+        VT(R8_UINT,    8_UINT,  R8_UINT,  WZYX),
+        VT(R8_SINT,    8_SINT,  R8_SINT,  WZYX),
+        V_(R8_USCALED, 8_UINT,  NONE,     WZYX),
+        V_(R8_SSCALED, 8_UINT,  NONE,     WZYX),
+        _T(A8_UNORM,   8_UNORM, A8_UNORM, WZYX),
+        _T(L8_UNORM,   8_UNORM, R8_UNORM, WZYX),
+        _T(I8_UNORM,   8_UNORM, NONE,     WZYX),
+        _T(A8_UINT,    8_UINT,  NONE,     WZYX),
+        _T(A8_SINT,    8_SINT,  NONE,     WZYX),
+        _T(L8_UINT,    8_UINT,  NONE,     WZYX),
+        _T(L8_SINT,    8_SINT,  NONE,     WZYX),
+        _T(I8_UINT,    8_UINT,  NONE,     WZYX),
+        _T(I8_SINT,    8_SINT,  NONE,     WZYX),
+        _T(S8_UINT,    8_UINT,  R8_UNORM, WZYX),
+        /* 16-bit */
+        VT(R16_UNORM,   16_UNORM, NONE,     WZYX),
+        VT(R16_SNORM,   16_SNORM, NONE,     WZYX),
+        VT(R16_UINT,    16_UINT,  R16_UINT, WZYX),
+        VT(R16_SINT,    16_SINT,  R16_SINT, WZYX),
+        V_(R16_USCALED, 16_UINT,  NONE,     WZYX),
+        V_(R16_SSCALED, 16_UINT,  NONE,     WZYX),
+        VT(R16_FLOAT,   16_FLOAT, R16_FLOAT,WZYX),
+        _T(A16_UINT,    16_UINT,  NONE,     WZYX),
+        _T(A16_SINT,    16_SINT,  NONE,     WZYX),
+        _T(L16_UINT,    16_UINT,  NONE,     WZYX),
+        _T(L16_SINT,    16_SINT,  NONE,     WZYX),
+        _T(I16_UINT,    16_UINT,  NONE,     WZYX),
+        _T(I16_SINT,    16_SINT,  NONE,     WZYX),
+        VT(R8G8_UNORM,   8_8_UNORM, R8G8_UNORM, WZYX),
+        VT(R8G8_SNORM,   8_8_SNORM, R8G8_SNORM, WZYX),
+        VT(R8G8_UINT,    8_8_UINT,  NONE,       WZYX),
+        VT(R8G8_SINT,    8_8_SINT,  NONE,       WZYX),
+        V_(R8G8_USCALED, 8_8_UINT,  NONE,       WZYX),
+        V_(R8G8_SSCALED, 8_8_SINT,  NONE,       WZYX),
+        _T(L8A8_UINT,    8_8_UINT,  NONE,       WZYX),
+        _T(L8A8_SINT,    8_8_SINT,  NONE,       WZYX),
+        _T(Z16_UNORM,      Z16_UNORM,     R8G8_UNORM,     WZYX),
+        _T(B5G6R5_UNORM,   5_6_5_UNORM,   R5G6B5_UNORM,   WXYZ),
+        _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
+        _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
+        _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ),
+        /* 24-bit */
+        V_(R8G8B8_UNORM,   8_8_8_UNORM, NONE, WZYX),
+        V_(R8G8B8_SNORM,   8_8_8_SNORM, NONE, WZYX),
+        V_(R8G8B8_UINT,    8_8_8_UINT,  NONE, WZYX),
+        V_(R8G8B8_SINT,    8_8_8_SINT,  NONE, WZYX),
+        V_(R8G8B8_USCALED, 8_8_8_UINT,  NONE, WZYX),
+        V_(R8G8B8_SSCALED, 8_8_8_SINT,  NONE, WZYX),
+        /* 32-bit */
+        VT(R32_UINT,    32_UINT,  R32_UINT, WZYX),
+        VT(R32_SINT,    32_SINT,  R32_SINT, WZYX),
+        V_(R32_USCALED, 32_UINT,  NONE,     WZYX),
+        V_(R32_SSCALED, 32_UINT,  NONE,     WZYX),
+        VT(R32_FLOAT,   32_FLOAT, R32_FLOAT,WZYX),
+        V_(R32_FIXED,   32_FIXED, NONE,     WZYX),
+        _T(A32_UINT,    32_UINT,  NONE,     WZYX),
+        _T(A32_SINT,    32_SINT,  NONE,     WZYX),
+        _T(L32_UINT,    32_UINT,  NONE,     WZYX),
+        _T(L32_SINT,    32_SINT,  NONE,     WZYX),
+        _T(I32_UINT,    32_UINT,  NONE,     WZYX),
+        _T(I32_SINT,    32_SINT,  NONE,     WZYX),
+        VT(R16G16_UNORM,   16_16_UNORM, NONE,        WZYX),
+        VT(R16G16_SNORM,   16_16_SNORM, NONE,        WZYX),
+        VT(R16G16_UINT,    16_16_UINT,  R16G16_UINT, WZYX),
+        VT(R16G16_SINT,    16_16_SINT,  R16G16_SINT, WZYX),
+        V_(R16G16_USCALED, 16_16_UINT,  NONE,        WZYX),
+        V_(R16G16_SSCALED, 16_16_SINT,  NONE,        WZYX),
+        VT(R16G16_FLOAT,   16_16_FLOAT, R16G16_FLOAT,WZYX),
+        _T(L16A16_UINT,    16_16_UINT,  NONE,        WZYX),
+        _T(L16A16_SINT,    16_16_SINT,  NONE,        WZYX),
+        VT(R8G8B8A8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(R8G8B8X8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(R8G8B8A8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(R8G8B8X8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        VT(R8G8B8A8_SNORM,   8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX),
+        VT(R8G8B8A8_UINT,    8_8_8_8_UINT,  R8G8B8A8_UINT,  WZYX),
+        VT(R8G8B8A8_SINT,    8_8_8_8_SINT,  R8G8B8A8_SINT,  WZYX),
+        V_(R8G8B8A8_USCALED, 8_8_8_8_UINT,  NONE,           WZYX),
+        V_(R8G8B8A8_SSCALED, 8_8_8_8_SINT,  NONE,           WZYX),
+        VT(B8G8R8A8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        _T(B8G8R8X8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        VT(B8G8R8A8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        _T(B8G8R8X8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        VT(A8B8G8R8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        _T(X8B8G8R8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        _T(A8B8G8R8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        _T(X8B8G8R8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        VT(A8R8G8B8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        _T(X8R8G8B8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        _T(A8R8G8B8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        _T(X8R8G8B8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        VT(R10G10B10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX),
+        VT(B10G10R10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
+        _T(B10G10R10X2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
+        V_(R10G10B10A2_SNORM,   10_10_10_2_SNORM, NONE,              WZYX),
+        V_(R10G10B10A2_UINT,    10_10_10_2_UINT,  NONE,              WZYX),
+        V_(R10G10B10A2_USCALED, 10_10_10_2_UINT,  NONE,              WZYX),
+        V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WZYX),
+        _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
+        _T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,            WZYX),
+        _T(Z24X8_UNORM,       X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(Z32_FLOAT,         Z32_FLOAT,   R8G8B8A8_UNORM, WZYX),
+        _T(Z32_FLOAT_S8X24_UINT, Z32_FLOAT,R8G8B8A8_UNORM, WZYX),
+        /* 48-bit */
+        V_(R16G16B16_UNORM,   16_16_16_UNORM, NONE, WZYX),
+        V_(R16G16B16_SNORM,   16_16_16_SNORM, NONE, WZYX),
+        V_(R16G16B16_UINT,    16_16_16_UINT,  NONE, WZYX),
+        V_(R16G16B16_SINT,    16_16_16_SINT,  NONE, WZYX),
+        V_(R16G16B16_USCALED, 16_16_16_UINT,  NONE, WZYX),
+        V_(R16G16B16_SSCALED, 16_16_16_SINT,  NONE, WZYX),
+        V_(R16G16B16_FLOAT,   16_16_16_FLOAT, NONE, WZYX),
+        /* 64-bit */
+        VT(R16G16B16A16_UNORM,   16_16_16_16_UNORM, NONE,               WZYX),
+        VT(R16G16B16A16_SNORM,   16_16_16_16_SNORM, NONE,               WZYX),
+        VT(R16G16B16A16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
+        _T(R16G16B16X16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
+        VT(R16G16B16A16_SINT,    16_16_16_16_SINT,  R16G16B16A16_SINT,  WZYX),
+        _T(R16G16B16X16_SINT,    16_16_16_16_SINT,  R16G16B16A16_SINT,  WZYX),
+        V_(R16G16B16A16_USCALED, 16_16_16_16_UINT,  NONE,               WZYX),
+        V_(R16G16B16A16_SSCALED, 16_16_16_16_SINT,  NONE,               WZYX),
+        VT(R16G16B16A16_FLOAT,   16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX),
+        _T(R16G16B16X16_FLOAT,   16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX),
+        VT(R32G32_UINT,    32_32_UINT,  R32G32_UINT, WZYX),
+        VT(R32G32_SINT,    32_32_SINT,  R32G32_SINT, WZYX),
+        V_(R32G32_USCALED, 32_32_UINT,  NONE,        WZYX),
+        V_(R32G32_SSCALED, 32_32_SINT,  NONE,        WZYX),
+        VT(R32G32_FLOAT,   32_32_FLOAT, R32G32_FLOAT,WZYX),
+        V_(R32G32_FIXED,   32_32_FIXED, NONE,        WZYX),
+        _T(L32A32_UINT,    32_32_UINT,  NONE,        WZYX),
+        _T(L32A32_SINT,    32_32_SINT,  NONE,        WZYX),
+        /* 96-bit */
+        V_(R32G32B32_UINT,    32_32_32_UINT,  NONE, WZYX),
+        V_(R32G32B32_SINT,    32_32_32_SINT,  NONE, WZYX),
+        V_(R32G32B32_USCALED, 32_32_32_UINT,  NONE, WZYX),
+        V_(R32G32B32_SSCALED, 32_32_32_SINT,  NONE, WZYX),
+        V_(R32G32B32_FLOAT,   32_32_32_FLOAT, NONE, WZYX),
+        V_(R32G32B32_FIXED,   32_32_32_FIXED, NONE, WZYX),
+        /* 128-bit */
+        VT(R32G32B32A32_UINT,    32_32_32_32_UINT,  R32G32B32A32_UINT,  WZYX),
+        _T(R32G32B32X32_UINT,    32_32_32_32_UINT,  R32G32B32A32_UINT,  WZYX),
+        VT(R32G32B32A32_SINT,    32_32_32_32_SINT,  R32G32B32A32_SINT,  WZYX),
+        _T(R32G32B32X32_SINT,    32_32_32_32_SINT,  R32G32B32A32_SINT,  WZYX),
+        V_(R32G32B32A32_USCALED, 32_32_32_32_UINT,  NONE,               WZYX),
+        V_(R32G32B32A32_SSCALED, 32_32_32_32_SINT,  NONE,               WZYX),
+        VT(R32G32B32A32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
+        _T(R32G32B32X32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
+        V_(R32G32B32A32_FIXED,   32_32_32_32_FIXED, NONE,               WZYX),
+        /* compressed */
+        _T(ETC1_RGB8, ETC1, NONE, WZYX),
+        _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX),
+        _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX),
+        _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX),
+        _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX),
+        _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX),
+        _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX),
+        _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX),
+        _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX),
+        _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX),
+        _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX),
+};
+enum a3xx_vtx_fmt
+fd3_pipe2vtx(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return ~0;
+        return formats[format].vtx;
+}
+enum a3xx_tex_fmt
+fd3_pipe2tex(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return ~0;
+        return formats[format].tex;
+}
+enum a3xx_color_fmt
+fd3_pipe2color(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return ~0;
+        return formats[format].rb;
+}
+enum a3xx_color_swap
+fd3_pipe2swap(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return WZYX;
+        return formats[format].swap;
+}
+enum a3xx_tex_fetchsize
+fd3_pipe2fetchsize(enum pipe_format format)
+{
+        if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+                format = PIPE_FORMAT_Z32_FLOAT;
+        switch (util_format_get_blocksizebits(format)) {
+        case 8: return TFETCH_1_BYTE;
+        case 16: return TFETCH_2_BYTE;
+        case 32: return TFETCH_4_BYTE;
+        case 64: return TFETCH_8_BYTE;
+        case 128: return TFETCH_16_BYTE;
+        default:
+                debug_printf("Unknown block size for format %s: %d\n",
+                                         util_format_name(format),
+                                         util_format_get_blocksizebits(format));
+                return TFETCH_DISABLE;
+        }
+}
+/* we need to special case a bit the depth/stencil restore, because we are
+ * using the texture sampler to blit into the depth/stencil buffer, *not*
+ * into a color buffer.  Otherwise fd3_tex_swiz() will do the wrong thing,
+ * as it is assuming that you are sampling into normal render target..
+ */
+enum pipe_format
+fd3_gmem_restore_format(enum pipe_format format)
+{
+        switch (format) {
+        case PIPE_FORMAT_Z24X8_UNORM:
+        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+                return PIPE_FORMAT_R8G8B8A8_UNORM;
+        case PIPE_FORMAT_Z16_UNORM:
+                return PIPE_FORMAT_R8G8_UNORM;
+        case PIPE_FORMAT_S8_UINT:
+                return PIPE_FORMAT_R8_UNORM;
+        default:
+                return format;
+        }
+}
+enum a3xx_color_fmt
+fd3_fs_output_format(enum pipe_format format)
+{
+        if (util_format_is_srgb(format))
+                return RB_R16G16B16A16_FLOAT;
+        switch (format) {
+        case PIPE_FORMAT_R16_FLOAT:
+        case PIPE_FORMAT_R16G16_FLOAT:
+        case PIPE_FORMAT_R11G11B10_FLOAT:
+                return RB_R16G16B16A16_FLOAT;
+        default:
+                return fd3_pipe2color(format);
+        }
+}
+static inline enum a3xx_tex_swiz
+tex_swiz(unsigned swiz)
+{
+        switch (swiz) {
+        default:
+        case PIPE_SWIZZLE_RED:   return A3XX_TEX_X;
+        case PIPE_SWIZZLE_GREEN: return A3XX_TEX_Y;
+        case PIPE_SWIZZLE_BLUE:  return A3XX_TEX_Z;
+        case PIPE_SWIZZLE_ALPHA: return A3XX_TEX_W;
+        case PIPE_SWIZZLE_ZERO:  return A3XX_TEX_ZERO;
+        case PIPE_SWIZZLE_ONE:   return A3XX_TEX_ONE;
+        }
+}
+uint32_t
+fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+                unsigned swizzle_b, unsigned swizzle_a)
+{
+        const struct util_format_description *desc =
+                        util_format_description(format);
+        unsigned char swiz[4] = {
+                        swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+        }, rswiz[4];
+        util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+        return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+                        A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+                        A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+                        A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_format.h
 ,0 → 1,67
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef FD3_FORMAT_H_
+#define FD3_FORMAT_H_
+#include "util/u_format.h"
+#include "freedreno_util.h"
+#include "a3xx.xml.h"
+enum a3xx_vtx_fmt fd3_pipe2vtx(enum pipe_format format);
+enum a3xx_tex_fmt fd3_pipe2tex(enum pipe_format format);
+enum a3xx_tex_fetchsize fd3_pipe2fetchsize(enum pipe_format format);
+enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format);
+enum pipe_format fd3_gmem_restore_format(enum pipe_format format);
+enum a3xx_color_fmt fd3_fs_output_format(enum pipe_format format);
+enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
+uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+                unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+static INLINE bool
+fd3_half_precision(const struct pipe_surface *surface)
+{
+        enum pipe_format format;
+        if (!surface)
+                return true;
+        format = surface->format;
+        /* colors are provided in consts, which go through cov.f32f16, which will
+         * break these values
+         */
+        if (util_format_is_pure_integer(format))
+                return false;
+        /* avoid losing precision on 32-bit float formats */
+        if (util_format_is_float(format) &&
+                util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
+                return false;
+        return true;
+}
+#endif /* FD3_FORMAT_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
 ,0 → 1,1057
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "freedreno_draw.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "fd3_gmem.h"
+#include "fd3_context.h"
+#include "fd3_emit.h"
+#include "fd3_program.h"
+#include "fd3_format.h"
+#include "fd3_zsa.h"
+static void
+emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+                 struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w,
+                 bool decode_srgb)
+{
+        enum a3xx_tile_mode tile_mode;
+        unsigned i;
+        if (bin_w) {
+                tile_mode = TILE_32X32;
+        } else {
+                tile_mode = LINEAR;
+        }
+        for (i = 0; i < 4; i++) {
+                enum pipe_format pformat = 0;
+                enum a3xx_color_fmt format = 0;
+                enum a3xx_color_swap swap = WZYX;
+                bool srgb = false;
+                struct fd_resource *rsc = NULL;
+                struct fd_resource_slice *slice = NULL;
+                uint32_t stride = 0;
+                uint32_t base = 0;
+                uint32_t offset = 0;
+                if ((i < nr_bufs) && bufs[i]) {
+                        struct pipe_surface *psurf = bufs[i];
+                        rsc = fd_resource(psurf->texture);
+                        pformat = psurf->format;
+                        /* In case we're drawing to Z32F_S8, the "color" actually goes to
+                         * the stencil
+                         */
+                        if (rsc->stencil) {
+                                rsc = rsc->stencil;
+                                pformat = rsc->base.b.format;
+                                bases++;
+                        }
+                        slice = fd_resource_slice(rsc, psurf->u.tex.level);
+                        format = fd3_pipe2color(pformat);
+                        swap = fd3_pipe2swap(pformat);
+                        if (decode_srgb)
+                                srgb = util_format_is_srgb(pformat);
+                        else
+                                pformat = util_format_linear(pformat);
+                        debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+                        offset = fd_resource_offset(rsc, psurf->u.tex.level,
+                                        psurf->u.tex.first_layer);
+                        if (bin_w) {
+                                stride = bin_w * rsc->cpp;
+                                if (bases) {
+                                        base = bases[i];
+                                }
+                        } else {
+                                stride = slice->pitch * rsc->cpp;
+                        }
+                } else if (i < nr_bufs && bases) {
+                        base = bases[i];
+                }
+                OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
+                OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+                                A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
+                                A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+                                A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
+                                COND(srgb, A3XX_RB_MRT_BUF_INFO_COLOR_SRGB));
+                if (bin_w || (i >= nr_bufs) || !bufs[i]) {
+                        OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
+                } else {
+                        OUT_RELOCW(ring, rsc->bo, offset, 0, -1);
+                }
+                OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
+                OUT_RING(ring, COND((i < nr_bufs) && bufs[i],
+                                                        A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(
+                                                                        fd3_fs_output_format(pformat))));
+        }
+}
+static bool
+use_hw_binning(struct fd_context *ctx)
+{
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        /* workaround: combining scissor optimization and hw binning
+         * seems problematic.  Seems like we end up with a mismatch
+         * between binning pass and rendering pass, wrt. where the hw
+         * thinks the vertices belong.  And the blob driver doesn't
+         * seem to implement anything like scissor optimization, so
+         * not entirely sure what I might be missing.
+         *
+         * But scissor optimization is mainly for window managers,
+         * which don't have many vertices (and therefore doesn't
+         * benefit much from binning pass).
+         *
+         * So for now just disable binning if scissor optimization is
+         * used.
+         */
+        if (gmem->minx || gmem->miny)
+                return false;
+        return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
+}
+/* workaround for (hlsq?) lockup with hw binning on a3xx patchlevel 0 */
+static void update_vsc_pipe(struct fd_context *ctx);
+static void
+emit_binning_workaround(struct fd_context *ctx)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd3_emit emit = {
+                        .vtx = &fd3_ctx->solid_vbuf_state,
+                        .prog = &ctx->solid_prog,
+                        .key = {
+                                .half_precision = true,
+                        },
+        };
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                        A3XX_RB_MODE_CONTROL_MRT(0));
+        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(32) |
+                        A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+        OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+        OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+                        A3XX_RB_COPY_CONTROL_MODE(0) |
+                        A3XX_RB_COPY_CONTROL_GMEM_BASE(0));
+        OUT_RELOCW(ring, fd_resource(fd3_ctx->solid_vbuf)->bo, 0x20, 0, -1);  /* RB_COPY_DEST_BASE */
+        OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(128));
+        OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+                        A3XX_RB_COPY_DEST_INFO_FORMAT(RB_R8G8B8A8_UNORM) |
+                        A3XX_RB_COPY_DEST_INFO_SWAP(WZYX) |
+                        A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+                        A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+        fd3_program_emit(ring, &emit, 0, NULL);
+        fd3_emit_vertex_bufs(ring, &emit);
+        OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+                        A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+                        A3XX_HLSQ_CONTROL_0_REG_RESERVED2 |
+                        A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+                        A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+        OUT_RING(ring, 0); /* HLSQ_CONTROL_3_REG */
+        OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+        OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0x20) |
+                        A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0x20));
+        OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+                        A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+                        A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+        OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                        A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                        A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0.0));
+        OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+        OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+        OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                        A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(1));
+        OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(0) |
+                        A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(1));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(31) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(0));
+        fd_wfi(ctx, ring);
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(0.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(1.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(0.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(1.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+        OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE |
+                        A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE |
+                        A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE |
+                        A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE |
+                        A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE);
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+        OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+                        A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+        OUT_PKT3(ring, CP_DRAW_INDX_2, 5);
+        OUT_RING(ring, 0x00000000);   /* viz query info. */
+        OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_IMMEDIATE,
+                                                INDEX_SIZE_32_BIT, IGNORE_VISIBILITY, 0));
+        OUT_RING(ring, 2);            /* NumIndices */
+        OUT_RING(ring, 2);
+        OUT_RING(ring, 1);
+        fd_reset_wfi(ctx);
+        OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 1);
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(TWO_QUADS));
+        OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+        OUT_RING(ring, 0x00000000);
+        fd_wfi(ctx, ring);
+        OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+        OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+                        A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+        OUT_RING(ring, 0x00000000);
+}
+/* transfer from gmem to system memory (ie. normal RAM) */
+static void
+emit_gmem2mem_surf(struct fd_context *ctx,
+                                   enum adreno_rb_copy_control_mode mode,
+                                   bool stencil,
+                                   uint32_t base, struct pipe_surface *psurf)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_resource *rsc = fd_resource(psurf->texture);
+        enum pipe_format format = psurf->format;
+        if (stencil) {
+                rsc = rsc->stencil;
+                format = rsc->base.b.format;
+        }
+        struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level);
+        uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
+                        psurf->u.tex.first_layer);
+        debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+        OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+        OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+                        A3XX_RB_COPY_CONTROL_MODE(mode) |
+                        A3XX_RB_COPY_CONTROL_GMEM_BASE(base) |
+                        COND(format == PIPE_FORMAT_Z32_FLOAT ||
+                                 format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
+                                 A3XX_RB_COPY_CONTROL_UNK12));
+        OUT_RELOCW(ring, rsc->bo, offset, 0, -1);    /* RB_COPY_DEST_BASE */
+        OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
+        OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+                        A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |
+                        A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+                        A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+                        A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));
+        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+static void
+fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd3_emit emit = {
+                        .vtx = &fd3_ctx->solid_vbuf_state,
+                        .prog = &ctx->solid_prog,
+                        .key = {
+                                .half_precision = true,
+                        },
+        };
+        int i;
+        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+        OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                        A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                        A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+        OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+        OUT_RING(ring, 0xff000000 |
+                        A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                        A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                        A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+        OUT_RING(ring, 0xff000000 |
+                        A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                        A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                        A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+        OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */
+        fd_wfi(ctx, ring);
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)pfb->width/2.0 - 0.5));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)pfb->width/2.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)pfb->height/2.0 - 0.5));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)pfb->height/2.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                        A3XX_RB_MODE_CONTROL_MRT(0));
+        OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+                        A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(ctx->gmem.bin_w));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                        A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+                        A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+        OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+        OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+        OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        fd3_program_emit(ring, &emit, 0, NULL);
+        fd3_emit_vertex_bufs(ring, &emit);
+        if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+                struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+                if (!rsc->stencil || ctx->resolve & FD_BUFFER_DEPTH)
+                        emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, false,
+                                                           ctx->gmem.zsbuf_base[0], pfb->zsbuf);
+                if (rsc->stencil && ctx->resolve & FD_BUFFER_STENCIL)
+                        emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, true,
+                                                           ctx->gmem.zsbuf_base[1], pfb->zsbuf);
+        }
+        if (ctx->resolve & FD_BUFFER_COLOR) {
+                for (i = 0; i < pfb->nr_cbufs; i++) {
+                        if (!pfb->cbufs[i])
+                                continue;
+                        if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
+                                continue;
+                        emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, false,
+                                                           ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
+                }
+        }
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                        A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+/* transfer from system memory to gmem */
+static void
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
+                struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_surface *zsbufs[2];
+        assert(bufs > 0);
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                                   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                                   A3XX_RB_MODE_CONTROL_MRT(bufs - 1));
+        emit_mrt(ring, bufs, psurf, bases, bin_w, false);
+        if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||
+                                         psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+                /* Depth is stored as unorm in gmem, so we have to write it in using a
+                 * special blit shader which writes depth.
+                 */
+                OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+                OUT_RING(ring, (A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z |
+                                                A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+                                                A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                                                A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE |
+                                                A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)));
+                OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+                OUT_RING(ring, A3XX_RB_DEPTH_INFO_DEPTH_BASE(bases[0]) |
+                                 A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
+                OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w));
+                if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
+                        OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
+                        OUT_RING(ring, 0);
+                } else {
+                        /* The gmem_restore_tex logic will put the first buffer's stencil
+                         * as color. Supply it with the proper information to make that
+                         * happen.
+                         */
+                        zsbufs[0] = zsbufs[1] = psurf[0];
+                        psurf = zsbufs;
+                        bufs = 2;
+                }
+        } else {
+                OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+                OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
+        }
+        fd3_emit_gmem_restore_tex(ring, psurf, bufs);
+        fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+static void
+fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd3_emit emit = {
+                        .vtx = &fd3_ctx->blit_vbuf_state,
+                        .sprite_coord_enable = 1,
+                        /* NOTE: They all use the same VP, this is for vtx bufs. */
+                        .prog = &ctx->blit_prog[0],
+                        .key = {
+                                .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
+                                                                   fd3_half_precision(pfb->cbufs[1]) &&
+                                                                   fd3_half_precision(pfb->cbufs[2]) &&
+                                                                   fd3_half_precision(pfb->cbufs[3]))
+                        },
+        };
+        float x0, y0, x1, y1;
+        unsigned bin_w = tile->bin_w;
+        unsigned bin_h = tile->bin_h;
+        unsigned i;
+        /* write texture coordinates to vertexbuf: */
+        x0 = ((float)tile->xoff) / ((float)pfb->width);
+        x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
+        y0 = ((float)tile->yoff) / ((float)pfb->height);
+        y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
+        OUT_PKT3(ring, CP_MEM_WRITE, 5);
+        OUT_RELOCW(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
+        OUT_RING(ring, fui(x0));
+        OUT_RING(ring, fui(y0));
+        OUT_RING(ring, fui(x1));
+        OUT_RING(ring, fui(y1));
+        for (i = 0; i < 4; i++) {
+                OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+                OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
+                                A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+                                A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
+                OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+                OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+        }
+        OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS) |
+                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+        fd_wfi(ctx, ring);
+        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
+        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+        OUT_RING(ring, 0);
+        OUT_RING(ring, 0);
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+        OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);   /* GRAS_CL_CLIP_CNTL */
+        fd_wfi(ctx, ring);
+        OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+        OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                        A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
+                        A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
+        OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+        OUT_RING(ring, 0x2 |
+                        A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+                        A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
+                        A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                        A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+        OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
+        OUT_RING(ring, 0); /* RB_STENCIL_INFO */
+        OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+        OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                        A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+        OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+        OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+        OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+        OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        fd3_emit_vertex_bufs(ring, &emit);
+        /* for gmem pitch/base calculations, we need to use the non-
+         * truncated tile sizes:
+         */
+        bin_w = gmem->bin_w;
+        bin_h = gmem->bin_h;
+        if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
+                emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+                fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+                emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
+        }
+        if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+                if (pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+                        pfb->zsbuf->format != PIPE_FORMAT_Z32_FLOAT) {
+                        /* Non-float can use a regular color write. It's split over 8-bit
+                         * components, so half precision is always sufficient.
+                         */
+                        emit.prog = &ctx->blit_prog[0];
+                        emit.key.half_precision = true;
+                } else {
+                        /* Float depth needs special blit shader that writes depth */
+                        if (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT)
+                                emit.prog = &ctx->blit_z;
+                        else
+                                emit.prog = &ctx->blit_zs;
+                        emit.key.half_precision = false;
+                }
+                fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
+                emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+        }
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                                   A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                                   A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+}
+static void
+patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
+{
+        unsigned i;
+        for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
+                struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
+                *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
+        }
+        util_dynarray_resize(&ctx->draw_patches, 0);
+}
+static void
+patch_rbrc(struct fd_context *ctx, uint32_t val)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        unsigned i;
+        for (i = 0; i < fd_patch_num_elements(&fd3_ctx->rbrc_patches); i++) {
+                struct fd_cs_patch *patch = fd_patch_element(&fd3_ctx->rbrc_patches, i);
+                *patch->cs = patch->val | val;
+        }
+        util_dynarray_resize(&fd3_ctx->rbrc_patches, 0);
+}
+/* for rendering directly to system memory: */
+static void
+fd3_emit_sysmem_prep(struct fd_context *ctx)
+{
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd_ringbuffer *ring = ctx->ring;
+        uint32_t i, pitch = 0;
+        for (i = 0; i < pfb->nr_cbufs; i++) {
+                struct pipe_surface *psurf = pfb->cbufs[i];
+                if (!psurf)
+                        continue;
+                pitch = fd_resource(psurf->texture)->slices[psurf->u.tex.level].pitch;
+        }
+        fd3_emit_restore(ctx);
+        OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+        OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+                        A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0, true);
+        /* setup scissor/offset for current tile: */
+        OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+        OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) |
+                        A3XX_RB_WINDOW_OFFSET_Y(0));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_RB_MODE_CONTROL_GMEM_BYPASS |
+                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                        A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+        patch_draws(ctx, IGNORE_VISIBILITY);
+        patch_rbrc(ctx, A3XX_RB_RENDER_CONTROL_BIN_WIDTH(pitch));
+}
+static void
+update_vsc_pipe(struct fd_context *ctx)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        int i;
+        OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
+        OUT_RELOCW(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+        for (i = 0; i < 8; i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                if (!pipe->bo) {
+                        pipe->bo = fd_bo_new(ctx->dev, 0x40000,
+                                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+                }
+                OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
+                OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(pipe->x) |
+                                A3XX_VSC_PIPE_CONFIG_Y(pipe->y) |
+                                A3XX_VSC_PIPE_CONFIG_W(pipe->w) |
+                                A3XX_VSC_PIPE_CONFIG_H(pipe->h));
+                OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE[i].DATA_ADDRESS */
+                OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE[i].DATA_LENGTH */
+        }
+}
+static void
+emit_binning_pass(struct fd_context *ctx)
+{
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd_ringbuffer *ring = ctx->ring;
+        int i;
+        uint32_t x1 = gmem->minx;
+        uint32_t y1 = gmem->miny;
+        uint32_t x2 = gmem->minx + gmem->width - 1;
+        uint32_t y2 = gmem->miny + gmem->height - 1;
+        if (ctx->screen->gpu_id == 320) {
+                emit_binning_workaround(ctx);
+                fd_wfi(ctx, ring);
+                OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+                OUT_RING(ring, 0x00007fff);
+        }
+        OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+        OUT_RING(ring, A3XX_VSC_BIN_CONTROL_BINNING_ENABLE);
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+        OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+        OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+                        A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+        OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+                        A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+        /* setup scissor/offset for whole screen: */
+        OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+        OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(x1) |
+                        A3XX_RB_WINDOW_OFFSET_Y(y1));
+        OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_LRZ_VSC_CONTROL_BINNING_ENABLE);
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_TILING_PASS) |
+                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                        A3XX_RB_MODE_CONTROL_MRT(0));
+        for (i = 0; i < 4; i++) {
+                OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+                OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_CLEAR) |
+                                A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+                                A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0));
+        }
+        OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+        OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(1) |
+                        A3XX_PC_VSTREAM_CONTROL_N(0));
+        /* emit IB to binning drawcmds: */
+        OUT_IB(ring, ctx->binning_start, ctx->binning_end);
+        fd_reset_wfi(ctx);
+        fd_wfi(ctx, ring);
+        /* and then put stuff back the way it was: */
+        OUT_PKT0(ring, REG_A3XX_VSC_BIN_CONTROL, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+        OUT_RING(ring, A3XX_SP_SP_CTRL_REG_RESOLVE |
+                        A3XX_SP_SP_CTRL_REG_CONSTMODE(1) |
+                        A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+                        A3XX_SP_SP_CTRL_REG_L0MODE(0));
+        OUT_PKT0(ring, REG_A3XX_RB_LRZ_VSC_CONTROL, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                        A3XX_RB_MODE_CONTROL_MRT(pfb->nr_cbufs - 1));
+        OUT_RING(ring, A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER) |
+                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+        fd_event_write(ctx, ring, CACHE_FLUSH);
+        fd_wfi(ctx, ring);
+        if (ctx->screen->gpu_id == 320) {
+                /* dummy-draw workaround: */
+                OUT_PKT3(ring, CP_DRAW_INDX, 3);
+                OUT_RING(ring, 0x00000000);
+                OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+                                                        INDEX_SIZE_IGN, IGNORE_VISIBILITY, 0));
+                OUT_RING(ring, 0);             /* NumIndices */
+                fd_reset_wfi(ctx);
+        }
+        OUT_PKT3(ring, CP_NOP, 4);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        fd_wfi(ctx, ring);
+        if (ctx->screen->gpu_id == 320) {
+                emit_binning_workaround(ctx);
+        }
+}
+/* before first tile */
+static void
+fd3_emit_tile_init(struct fd_context *ctx)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        uint32_t rb_render_control;
+        fd3_emit_restore(ctx);
+        /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
+         * at the right and bottom edge tiles
+         */
+        OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+        OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+                        A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+        update_vsc_pipe(ctx);
+        if (use_hw_binning(ctx)) {
+                /* mark the end of the binning cmds: */
+                fd_ringmarker_mark(ctx->binning_end);
+                /* emit hw binning pass: */
+                emit_binning_pass(ctx);
+                patch_draws(ctx, USE_VISIBILITY);
+        } else {
+                patch_draws(ctx, IGNORE_VISIBILITY);
+        }
+        rb_render_control = A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+                        A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w);
+        patch_rbrc(ctx, rb_render_control);
+}
+/* before mem2gmem */
+static void
+fd3_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        if (ctx->needs_rb_fbd) {
+                fd_wfi(ctx, ring);
+                OUT_PKT0(ring, REG_A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+                OUT_RING(ring, A3XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+                                A3XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+                ctx->needs_rb_fbd = false;
+        }
+        OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE |
+                        A3XX_RB_MODE_CONTROL_MRT(MAX2(1, pfb->nr_cbufs) - 1));
+}
+/* before IB to rendering cmds: */
+static void
+fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        uint32_t x1 = tile->xoff;
+        uint32_t y1 = tile->yoff;
+        uint32_t x2 = tile->xoff + tile->bin_w - 1;
+        uint32_t y2 = tile->yoff + tile->bin_h - 1;
+        uint32_t reg;
+        OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+        reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
+        if (pfb->zsbuf) {
+                reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+        }
+        OUT_RING(ring, reg);
+        if (pfb->zsbuf) {
+                struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+                OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w));
+                if (rsc->stencil) {
+                        OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
+                        OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
+                        OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
+                }
+        } else {
+                OUT_RING(ring, 0x00000000);
+        }
+        if (use_hw_binning(ctx)) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[tile->p];
+                assert(pipe->w * pipe->h);
+                fd_event_write(ctx, ring, HLSQ_FLUSH);
+                fd_wfi(ctx, ring);
+                OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+                OUT_RING(ring, A3XX_PC_VSTREAM_CONTROL_SIZE(pipe->w * pipe->h) |
+                                A3XX_PC_VSTREAM_CONTROL_N(tile->n));
+                OUT_PKT3(ring, CP_SET_BIN_DATA, 2);
+                OUT_RELOC(ring, pipe->bo, 0, 0, 0);    /* BIN_DATA_ADDR <- VSC_PIPE[p].DATA_ADDRESS */
+                OUT_RELOC(ring, fd3_ctx->vsc_size_mem, /* BIN_SIZE_ADDR <- VSC_SIZE_ADDRESS + (p * 4) */
+                                (tile->p * 4), 0, 0);
+        } else {
+                OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+                OUT_RING(ring, 0x00000000);
+        }
+        OUT_PKT3(ring, CP_SET_BIN, 3);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+        OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w, true);
+        /* setup scissor/offset for current tile: */
+        OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1);
+        OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(tile->xoff) |
+                        A3XX_RB_WINDOW_OFFSET_Y(tile->yoff));
+        OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+        OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+                        A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+}
+void
+fd3_gmem_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->emit_sysmem_prep = fd3_emit_sysmem_prep;
+        ctx->emit_tile_init = fd3_emit_tile_init;
+        ctx->emit_tile_prep = fd3_emit_tile_prep;
+        ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
+        ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
+        ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_GMEM_H_
+#define FD3_GMEM_H_
+#include "pipe/p_context.h"
+void fd3_gmem_init(struct pipe_context *pctx);
+#endif /* FD3_GMEM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_program.c
 ,0 → 1,481
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "freedreno_program.h"
+#include "fd3_program.h"
+#include "fd3_emit.h"
+#include "fd3_texture.h"
+#include "fd3_format.h"
+static void
+delete_shader_stateobj(struct fd3_shader_stateobj *so)
+{
+        ir3_shader_destroy(so->shader);
+        free(so);
+}
+static struct fd3_shader_stateobj *
+create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+                enum shader_t type)
+{
+        struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
+        so->shader = ir3_shader_create(pctx, cso->tokens, type);
+        return so;
+}
+static void *
+fd3_fp_state_create(struct pipe_context *pctx,
+                const struct pipe_shader_state *cso)
+{
+        return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+}
+static void
+fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd3_shader_stateobj *so = hwcso;
+        delete_shader_stateobj(so);
+}
+static void *
+fd3_vp_state_create(struct pipe_context *pctx,
+                const struct pipe_shader_state *cso)
+{
+        return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+}
+static void
+fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd3_shader_stateobj *so = hwcso;
+        delete_shader_stateobj(so);
+}
+static void
+emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
+{
+        const struct ir3_info *si = &so->info;
+        enum adreno_state_block sb;
+        enum adreno_state_src src;
+        uint32_t i, sz, *bin;
+        if (so->type == SHADER_VERTEX) {
+                sb = SB_VERT_SHADER;
+        } else {
+                sb = SB_FRAG_SHADER;
+        }
+        if (fd_mesa_debug & FD_DBG_DIRECT) {
+                sz = si->sizedwords;
+                src = SS_DIRECT;
+                bin = fd_bo_map(so->bo);
+        } else {
+                sz = 0;
+                src = SS_INDIRECT;
+                bin = NULL;
+        }
+        OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                        CP_LOAD_STATE_0_STATE_SRC(src) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                        CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+        if (bin) {
+                OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+        } else {
+                OUT_RELOC(ring, so->bo, 0,
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+        }
+        for (i = 0; i < sz; i++) {
+                OUT_RING(ring, bin[i]);
+        }
+}
+void
+fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
+                                 int nr, struct pipe_surface **bufs)
+{
+        const struct ir3_shader_variant *vp, *fp;
+        const struct ir3_info *vsi, *fsi;
+        enum a3xx_instrbuffermode fpbuffer, vpbuffer;
+        uint32_t fpbuffersz, vpbuffersz, fsoff;
+        uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
+        int constmode;
+        int i, j, k;
+        vp = fd3_emit_get_vp(emit);
+        if (emit->key.binning_pass) {
+                /* use dummy stateobj to simplify binning vs non-binning: */
+                static const struct ir3_shader_variant binning_fp = {};
+                fp = &binning_fp;
+        } else {
+                fp = fd3_emit_get_fp(emit);
+        }
+        vsi = &vp->info;
+        fsi = &fp->info;
+        fpbuffer = BUFFER;
+        vpbuffer = BUFFER;
+        fpbuffersz = fp->instrlen;
+        vpbuffersz = vp->instrlen;
+        /*
+         * Decide whether to use BUFFER or CACHE mode for VS and FS.  It
+         * appears like 256 is the hard limit, but when the combined size
+         * exceeds 128 then blob will try to keep FS in BUFFER mode and
+         * switch to CACHE for VS until VS is too large.  The blob seems
+         * to switch FS out of BUFFER mode at slightly under 128.  But
+         * a bit fuzzy on the decision tree, so use slightly conservative
+         * limits.
+         *
+         * TODO check if these thresholds for BUFFER vs CACHE mode are the
+         *      same for all a3xx or whether we need to consider the gpuid
+         */
+        if ((fpbuffersz + vpbuffersz) > 128) {
+                if (fpbuffersz < 112) {
+                        /* FP:BUFFER   VP:CACHE  */
+                        vpbuffer = CACHE;
+                        vpbuffersz = 256 - fpbuffersz;
+                } else if (vpbuffersz < 112) {
+                        /* FP:CACHE    VP:BUFFER */
+                        fpbuffer = CACHE;
+                        fpbuffersz = 256 - vpbuffersz;
+                } else {
+                        /* FP:CACHE    VP:CACHE  */
+                        vpbuffer = fpbuffer = CACHE;
+                        vpbuffersz = fpbuffersz = 192;
+                }
+        }
+        if (fpbuffer == BUFFER) {
+                fsoff = 128 - fpbuffersz;
+        } else {
+                fsoff = 256 - fpbuffersz;
+        }
+        /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
+        constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
+        pos_regid = ir3_find_output_regid(vp,
+                ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+        posz_regid = ir3_find_output_regid(fp,
+                ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+        psize_regid = ir3_find_output_regid(vp,
+                ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+        if (fp->color0_mrt) {
+                color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+                        ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+        } else {
+                for (int i = 0; i < fp->outputs_count; i++) {
+                        ir3_semantic sem = fp->outputs[i].semantic;
+                        unsigned idx = sem2idx(sem);
+                        if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
+                                continue;
+                        assert(idx < 4);
+                        color_regid[idx] = fp->outputs[i].regid;
+                }
+        }
+        /* adjust regids for alpha output formats. there is no alpha render
+         * format, so it's just treated like red
+         */
+        for (i = 0; i < nr; i++)
+                if (util_format_is_alpha(pipe_surface_format(bufs[i])))
+                        color_regid[i] += 3;
+        /* we could probably divide this up into things that need to be
+         * emitted if frag-prog is dirty vs if vert-prog is dirty..
+         */
+        OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+                        A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
+                        /* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
+                         * flush some caches? I think we only need to set those
+                         * bits if we have updated const or shader..
+                         */
+                        A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+                        A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+                        A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
+                        COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD));
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+        OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
+        OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
+                        A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
+                        A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
+        OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
+                        A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
+                        A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
+        OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+        OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
+                        COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
+                        A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+                        A3XX_SP_SP_CTRL_REG_L0MODE(0));
+        OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
+        OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
+        OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
+        OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+                        A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
+                        COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
+                        A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
+                        A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
+                        A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
+                        A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+                        A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+                        COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
+                        A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
+        OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
+                        A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
+                        A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0)));
+        OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
+                        A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
+                        A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));
+        for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) {
+                uint32_t reg = 0;
+                OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
+                j = ir3_next_varying(fp, j);
+                if (j < fp->inputs_count) {
+                        k = ir3_find_output(vp, fp->inputs[j].semantic);
+                        reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
+                        reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
+                }
+                j = ir3_next_varying(fp, j);
+                if (j < fp->inputs_count) {
+                        k = ir3_find_output(vp, fp->inputs[j].semantic);
+                        reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
+                        reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
+                }
+                OUT_RING(ring, reg);
+        }
+        for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) {
+                uint32_t reg = 0;
+                OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
+                j = ir3_next_varying(fp, j);
+                if (j < fp->inputs_count)
+                        reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc);
+                j = ir3_next_varying(fp, j);
+                if (j < fp->inputs_count)
+                        reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc);
+                j = ir3_next_varying(fp, j);
+                if (j < fp->inputs_count)
+                        reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc);
+                j = ir3_next_varying(fp, j);
+                if (j < fp->inputs_count)
+                        reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc);
+                OUT_RING(ring, reg);
+        }
+        OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
+        OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
+                        A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+        OUT_RELOC(ring, vp->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */
+        if (emit->key.binning_pass) {
+                OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+                OUT_RING(ring, 0x00000000);
+                OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+                OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+                                A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
+                OUT_RING(ring, 0x00000000);
+                OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
+                OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+                                A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+        } else {
+                OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+                OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
+                OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+                OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+                                A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
+                                COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
+                                A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
+                                A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
+                                A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+                                A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+                                A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+                                COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+                                A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
+                OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
+                                A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
+                                A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
+                                A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
+                OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
+                OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
+                                        MAX2(128, vp->constlen)) |
+                                A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
+                OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
+        }
+        OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+        OUT_RING(ring,
+                         COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+                         A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
+                         A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
+        OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
+        for (i = 0; i < 4; i++) {
+                uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+                        COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
+                if (i < nr) {
+                        enum pipe_format fmt = pipe_surface_format(bufs[i]);
+                        mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
+                                COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
+                }
+                OUT_RING(ring, mrt_reg);
+        }
+        if (emit->key.binning_pass) {
+                OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+                OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
+                                A3XX_VPC_ATTR_LMSIZE(1) |
+                                COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
+                OUT_RING(ring, 0x00000000);
+        } else {
+                uint32_t vinterp[4], flatshade[2], vpsrepl[4];
+                memset(vinterp, 0, sizeof(vinterp));
+                memset(flatshade, 0, sizeof(flatshade));
+                memset(vpsrepl, 0, sizeof(vpsrepl));
+                /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+                for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
+                        uint32_t interp = fp->inputs[j].interpolate;
+                        /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+                         * instead.. rather than -8 everywhere else..
+                         */
+                        uint32_t inloc = fp->inputs[j].inloc - 8;
+                        /* currently assuming varyings aligned to 4 (not
+                         * packed):
+                         */
+                        debug_assert((inloc % 4) == 0);
+                        if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
+                                        ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+                                uint32_t loc = inloc;
+                                for (i = 0; i < 4; i++, loc++) {
+                                        vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
+                                        flatshade[loc / 32] |= 1 << (loc % 32);
+                                }
+                        }
+                        /* Replace the .xy coordinates with S/T from the point sprite. Set
+                         * interpolation bits for .zw such that they become .01
+                         */
+                        if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
+                                vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+                                        << ((inloc % 16) * 2);
+                                vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+                                vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+                        }
+                }
+                OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+                OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
+                                A3XX_VPC_ATTR_THRDASSIGN(1) |
+                                A3XX_VPC_ATTR_LMSIZE(1) |
+                                COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
+                OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
+                                A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
+                OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
+                OUT_RING(ring, vinterp[0]);    /* VPC_VARYING_INTERP[0].MODE */
+                OUT_RING(ring, vinterp[1]);    /* VPC_VARYING_INTERP[1].MODE */
+                OUT_RING(ring, vinterp[2]);    /* VPC_VARYING_INTERP[2].MODE */
+                OUT_RING(ring, vinterp[3]);    /* VPC_VARYING_INTERP[3].MODE */
+                OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
+                OUT_RING(ring, vpsrepl[0]);    /* VPC_VARYING_PS_REPL[0].MODE */
+                OUT_RING(ring, vpsrepl[1]);    /* VPC_VARYING_PS_REPL[1].MODE */
+                OUT_RING(ring, vpsrepl[2]);    /* VPC_VARYING_PS_REPL[2].MODE */
+                OUT_RING(ring, vpsrepl[3]);    /* VPC_VARYING_PS_REPL[3].MODE */
+                OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
+                OUT_RING(ring, flatshade[0]);        /* SP_FS_FLAT_SHAD_MODE_REG_0 */
+                OUT_RING(ring, flatshade[1]);        /* SP_FS_FLAT_SHAD_MODE_REG_1 */
+        }
+        OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
+        OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
+                        A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
+        if (vpbuffer == BUFFER)
+                emit_shader(ring, vp);
+        OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+        OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
+        if (!emit->key.binning_pass) {
+                if (fpbuffer == BUFFER)
+                        emit_shader(ring, fp);
+                OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+                OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
+        }
+}
+void
+fd3_prog_init(struct pipe_context *pctx)
+{
+        pctx->create_fs_state = fd3_fp_state_create;
+        pctx->delete_fs_state = fd3_fp_state_delete;
+        pctx->create_vs_state = fd3_vp_state_create;
+        pctx->delete_vs_state = fd3_vp_state_delete;
+        fd_prog_init(pctx);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_program.h
 ,0 → 1,53
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_PROGRAM_H_
+#define FD3_PROGRAM_H_
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "ir3_shader.h"
+struct fd3_shader_stateobj {
+        struct ir3_shader *shader;
+};
+struct fd3_emit;
+void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
+                                          int nr, struct pipe_surface **bufs);
+void fd3_prog_init(struct pipe_context *pctx);
+static inline struct ir3_shader_variant *
+fd3_shader_variant(struct fd3_shader_stateobj *so, struct ir3_shader_key key)
+{
+        return ir3_shader_variant(so->shader, key);
+}
+#endif /* FD3_PROGRAM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_query.c
 ,0 → 1,138
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+#include "fd3_query.h"
+#include "fd3_format.h"
+struct fd_rb_samp_ctrs {
+        uint64_t ctr[16];
+};
+/*
+ * Occlusion Query:
+ *
+ * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
+ * interpret results
+ */
+static struct fd_hw_sample *
+occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+        struct fd_hw_sample *samp =
+                        fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+        /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
+         * HW_QUERY_BASE_REG register:
+         */
+        OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+        OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
+        OUT_RING(ring, HW_QUERY_BASE_REG);
+        OUT_RING(ring, samp->offset);
+        OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
+        OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
+        OUT_PKT3(ring, CP_DRAW_INDX, 3);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX,
+                                                INDEX_SIZE_IGN, USE_VISIBILITY, 0));
+        OUT_RING(ring, 0);             /* NumIndices */
+        fd_event_write(ctx, ring, ZPASS_DONE);
+        OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
+        OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
+        OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
+        OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 |
+                        A3XX_VBIF_PERF_CNT_EN_CNT1 |
+                        A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
+                        A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
+                        A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
+        return samp;
+}
+static uint64_t
+count_samples(const struct fd_rb_samp_ctrs *start,
+                const struct fd_rb_samp_ctrs *end)
+{
+        uint64_t n = 0;
+        unsigned i;
+        /* not quite sure what all of these are, possibly different
+         * counters for each MRT render target:
+         */
+        for (i = 0; i < 16; i += 4)
+                n += end->ctr[i] - start->ctr[i];
+        return n;
+}
+static void
+occlusion_counter_accumulate_result(struct fd_context *ctx,
+                const void *start, const void *end,
+                union pipe_query_result *result)
+{
+        uint64_t n = count_samples(start, end);
+        result->u64 += n;
+}
+static void
+occlusion_predicate_accumulate_result(struct fd_context *ctx,
+                const void *start, const void *end,
+                union pipe_query_result *result)
+{
+        uint64_t n = count_samples(start, end);
+        result->b |= (n > 0);
+}
+static const struct fd_hw_sample_provider occlusion_counter = {
+                .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+                .active = FD_STAGE_DRAW,
+                .get_sample = occlusion_get_sample,
+                .accumulate_result = occlusion_counter_accumulate_result,
+};
+static const struct fd_hw_sample_provider occlusion_predicate = {
+                .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+                .active = FD_STAGE_DRAW,
+                .get_sample = occlusion_get_sample,
+                .accumulate_result = occlusion_predicate_accumulate_result,
+};
+void fd3_query_context_init(struct pipe_context *pctx)
+{
+        fd_hw_query_register_provider(pctx, &occlusion_counter);
+        fd_hw_query_register_provider(pctx, &occlusion_predicate);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_query.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_QUERY_H_
+#define FD3_QUERY_H_
+#include "pipe/p_context.h"
+void fd3_query_context_init(struct pipe_context *pctx);
+#endif /* FD3_QUERY_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
 ,0 → 1,104
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd3_rasterizer.h"
+#include "fd3_context.h"
+#include "fd3_format.h"
+void *
+fd3_rasterizer_state_create(struct pipe_context *pctx,
+                const struct pipe_rasterizer_state *cso)
+{
+        struct fd3_rasterizer_stateobj *so;
+        float psize_min, psize_max;
+        so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        if (cso->point_size_per_vertex) {
+                psize_min = util_get_min_point_size(cso);
+                psize_max = 4092;
+        } else {
+                /* Force the point size to be as if the vertex output was disabled. */
+                psize_min = cso->point_size;
+                psize_max = cso->point_size;
+        }
+/*
+        if (cso->line_stipple_enable) {
+                ??? TODO line stipple
+        }
+        TODO cso->half_pixel_center
+        if (cso->multisample)
+                TODO
+*/
+        so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
+        so->gras_su_point_minmax =
+                        A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+                        A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+        so->gras_su_point_size   = A3XX_GRAS_SU_POINT_SIZE(cso->point_size);
+        so->gras_su_poly_offset_scale =
+                        A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
+        so->gras_su_poly_offset_offset =
+                        A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+        so->gras_su_mode_control =
+                        A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+        so->pc_prim_vtx_cntl =
+                A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+                A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+        if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+                cso->fill_back != PIPE_POLYGON_MODE_FILL)
+                so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_POLYMODE_ENABLE;
+        if (cso->cull_face & PIPE_FACE_FRONT)
+                so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+        if (cso->cull_face & PIPE_FACE_BACK)
+                so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+        if (!cso->front_ccw)
+                so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
+        if (!cso->flatshade_first)
+                so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+        if (cso->offset_tri)
+                so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+        if (!cso->depth_clip)
+                so->gras_cl_clip_cntl |= A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
 ,0 → 1,56
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_RASTERIZER_H_
+#define FD3_RASTERIZER_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+struct fd3_rasterizer_stateobj {
+        struct pipe_rasterizer_state base;
+        uint32_t gras_su_point_minmax;
+        uint32_t gras_su_point_size;
+        uint32_t gras_su_poly_offset_scale;
+        uint32_t gras_su_poly_offset_offset;
+        uint32_t gras_su_mode_control;
+        uint32_t gras_cl_clip_cntl;
+        uint32_t pc_prim_vtx_cntl;
+};
+static INLINE struct fd3_rasterizer_stateobj *
+fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+        return (struct fd3_rasterizer_stateobj *)rast;
+}
+void * fd3_rasterizer_state_create(struct pipe_context *pctx,
+                const struct pipe_rasterizer_state *cso);
+#endif /* FD3_RASTERIZER_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
 ,0 → 1,109
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+#include "fd3_screen.h"
+#include "fd3_context.h"
+#include "fd3_format.h"
+static boolean
+fd3_screen_is_format_supported(struct pipe_screen *pscreen,
+                enum pipe_format format,
+                enum pipe_texture_target target,
+                unsigned sample_count,
+                unsigned usage)
+{
+        unsigned retval = 0;
+        if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+                        (sample_count > 1) || /* TODO add MSAA */
+                        !util_format_is_supported(format, usage)) {
+                DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+                                util_format_name(format), target, sample_count, usage);
+                return FALSE;
+        }
+        if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+                        (fd3_pipe2vtx(format) != ~0)) {
+                retval |= PIPE_BIND_VERTEX_BUFFER;
+        }
+        if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+                        (fd3_pipe2tex(format) != ~0)) {
+                retval |= PIPE_BIND_SAMPLER_VIEW;
+        }
+        if ((usage & (PIPE_BIND_RENDER_TARGET |
+                                PIPE_BIND_DISPLAY_TARGET |
+                                PIPE_BIND_SCANOUT |
+                                PIPE_BIND_SHARED |
+                                PIPE_BIND_BLENDABLE)) &&
+                        (fd3_pipe2color(format) != ~0) &&
+                        (fd3_pipe2tex(format) != ~0)) {
+                retval |= usage & (PIPE_BIND_RENDER_TARGET |
+                                PIPE_BIND_DISPLAY_TARGET |
+                                PIPE_BIND_SCANOUT |
+                                PIPE_BIND_SHARED);
+                if (!util_format_is_pure_integer(format))
+                        retval |= usage & PIPE_BIND_BLENDABLE;
+        }
+        if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+                        (fd_pipe2depth(format) != ~0) &&
+                        (fd3_pipe2tex(format) != ~0)) {
+                retval |= PIPE_BIND_DEPTH_STENCIL;
+        }
+        if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+                        (fd_pipe2index(format) != ~0)) {
+                retval |= PIPE_BIND_INDEX_BUFFER;
+        }
+        if (usage & PIPE_BIND_TRANSFER_READ)
+                retval |= PIPE_BIND_TRANSFER_READ;
+        if (usage & PIPE_BIND_TRANSFER_WRITE)
+                retval |= PIPE_BIND_TRANSFER_WRITE;
+        if (retval != usage) {
+                DBG("not supported: format=%s, target=%d, sample_count=%d, "
+                                "usage=%x, retval=%x", util_format_name(format),
+                                target, sample_count, usage, retval);
+        }
+        return retval == usage;
+}
+void
+fd3_screen_init(struct pipe_screen *pscreen)
+{
+        fd_screen(pscreen)->max_rts = 4;
+        pscreen->context_create = fd3_context_create;
+        pscreen->is_format_supported = fd3_screen_is_format_supported;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_screen.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_SCREEN_H_
+#define FD3_SCREEN_H_
+#include "pipe/p_screen.h"
+void fd3_screen_init(struct pipe_screen *pscreen);
+#endif /* FD3_SCREEN_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
 ,0 → 1,306
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "fd3_texture.h"
+#include "fd3_format.h"
+static enum a3xx_tex_clamp
+tex_clamp(unsigned wrap, bool clamp_to_edge)
+{
+        /* Hardware does not support _CLAMP, but we emulate it: */
+        if (wrap == PIPE_TEX_WRAP_CLAMP) {
+                wrap = (clamp_to_edge) ?
+                        PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+        }
+        switch (wrap) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return A3XX_TEX_REPEAT;
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return A3XX_TEX_CLAMP_TO_EDGE;
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+                return A3XX_TEX_CLAMP_TO_BORDER;
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+                /* only works for PoT.. need to emulate otherwise! */
+                return A3XX_TEX_MIRROR_CLAMP;
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return A3XX_TEX_MIRROR_REPEAT;
+        case PIPE_TEX_WRAP_MIRROR_CLAMP:
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+                /* these two we could perhaps emulate, but we currently
+                 * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
+                 */
+        default:
+                DBG("invalid wrap: %u", wrap);
+                return 0;
+        }
+}
+static enum a3xx_tex_filter
+tex_filter(unsigned filter, bool aniso)
+{
+        switch (filter) {
+        case PIPE_TEX_FILTER_NEAREST:
+                return A3XX_TEX_NEAREST;
+        case PIPE_TEX_FILTER_LINEAR:
+                return aniso ? A3XX_TEX_ANISO : A3XX_TEX_LINEAR;
+        default:
+                DBG("invalid filter: %u", filter);
+                return 0;
+        }
+}
+static void *
+fd3_sampler_state_create(struct pipe_context *pctx,
+                const struct pipe_sampler_state *cso)
+{
+        struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
+        unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
+        bool miplinear = false;
+        bool clamp_to_edge;
+        if (!so)
+                return NULL;
+        if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+                miplinear = true;
+        so->base = *cso;
+        /*
+         * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE;  for linear
+         * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally
+         * clamping the texture coordinates to [0.0, 1.0].
+         *
+         * The clamping will be taken care of in the shaders.  There are two
+         * filters here, but let the minification one has a say.
+         */
+        clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+        if (!clamp_to_edge) {
+                so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP);
+                so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP);
+                so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
+        }
+        so->texsamp0 =
+                        COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
+                        COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
+                        A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+                        A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+                        A3XX_TEX_SAMP_0_ANISO(aniso) |
+                        A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge)) |
+                        A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge)) |
+                        A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge));
+        if (cso->compare_mode)
+                so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+        if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+                so->texsamp1 =
+                                A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias) |
+                                A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+                                A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+        } else {
+                so->texsamp1 = 0x00000000;
+        }
+        return so;
+}
+static void
+fd3_sampler_states_bind(struct pipe_context *pctx,
+                unsigned shader, unsigned start,
+                unsigned nr, void **hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
+        unsigned i;
+        for (i = 0; i < nr; i++) {
+                if (hwcso[i]) {
+                        struct fd3_sampler_stateobj *sampler =
+                                        fd3_sampler_stateobj(hwcso[i]);
+                        if (sampler->saturate_s)
+                                saturate_s |= (1 << i);
+                        if (sampler->saturate_t)
+                                saturate_t |= (1 << i);
+                        if (sampler->saturate_r)
+                                saturate_r |= (1 << i);
+                }
+        }
+        fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
+        if (shader == PIPE_SHADER_FRAGMENT) {
+                fd3_ctx->fsaturate =
+                        (saturate_s != 0) ||
+                        (saturate_t != 0) ||
+                        (saturate_r != 0);
+                fd3_ctx->fsaturate_s = saturate_s;
+                fd3_ctx->fsaturate_t = saturate_t;
+                fd3_ctx->fsaturate_r = saturate_r;
+        } else if (shader == PIPE_SHADER_VERTEX) {
+                fd3_ctx->vsaturate =
+                        (saturate_s != 0) ||
+                        (saturate_t != 0) ||
+                        (saturate_r != 0);
+                fd3_ctx->vsaturate_s = saturate_s;
+                fd3_ctx->vsaturate_t = saturate_t;
+                fd3_ctx->vsaturate_r = saturate_r;
+        }
+}
+static enum a3xx_tex_type
+tex_type(unsigned target)
+{
+        switch (target) {
+        default:
+                assert(0);
+        case PIPE_BUFFER:
+        case PIPE_TEXTURE_1D:
+        case PIPE_TEXTURE_1D_ARRAY:
+                return A3XX_TEX_1D;
+        case PIPE_TEXTURE_RECT:
+        case PIPE_TEXTURE_2D:
+        case PIPE_TEXTURE_2D_ARRAY:
+                return A3XX_TEX_2D;
+        case PIPE_TEXTURE_3D:
+                return A3XX_TEX_3D;
+        case PIPE_TEXTURE_CUBE:
+        case PIPE_TEXTURE_CUBE_ARRAY:
+                return A3XX_TEX_CUBE;
+        }
+}
+static struct pipe_sampler_view *
+fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+                const struct pipe_sampler_view *cso)
+{
+        struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
+        struct fd_resource *rsc = fd_resource(prsc);
+        unsigned lvl = cso->u.tex.first_level;
+        unsigned miplevels = cso->u.tex.last_level - lvl;
+        uint32_t sz2 = 0;
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        pipe_reference(NULL, &prsc->reference);
+        so->base.texture = prsc;
+        so->base.reference.count = 1;
+        so->base.context = pctx;
+        so->texconst0 =
+                        A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+                        A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
+                        A3XX_TEX_CONST_0_MIPLVLS(miplevels) |
+                        fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+                                                cso->swizzle_b, cso->swizzle_a);
+        if (util_format_is_srgb(cso->format))
+                so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
+        so->texconst1 =
+                        A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
+                        A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+                        A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+        /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
+        so->texconst2 =
+                        A3XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+        switch (prsc->target) {
+        case PIPE_TEXTURE_1D_ARRAY:
+        case PIPE_TEXTURE_2D_ARRAY:
+                so->texconst3 =
+                                A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
+                                A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[0].size0);
+                break;
+        case PIPE_TEXTURE_3D:
+                so->texconst3 =
+                                A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
+                                A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[lvl].size0);
+                while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
+                        sz2 = rsc->slices[++lvl].size0;
+                so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(sz2);
+                break;
+        default:
+                so->texconst3 = 0x00000000;
+                break;
+        }
+        return &so->base;
+}
+static void
+fd3_set_sampler_views(struct pipe_context *pctx, unsigned shader,
+                                          unsigned start, unsigned nr,
+                                          struct pipe_sampler_view **views)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd3_context *fd3_ctx = fd3_context(ctx);
+        struct fd_texture_stateobj *tex;
+        uint16_t integer_s = 0, *ptr;
+        int i;
+        fd_set_sampler_views(pctx, shader, start, nr, views);
+        switch (shader) {
+        case PIPE_SHADER_FRAGMENT:
+                tex = &ctx->fragtex;
+                ptr = &fd3_ctx->finteger_s;
+                break;
+        case PIPE_SHADER_VERTEX:
+                tex = &ctx->verttex;
+                ptr = &fd3_ctx->vinteger_s;
+                break;
+        default:
+                return;
+        }
+        for (i = 0; i < tex->num_textures; i++)
+                if (util_format_is_pure_integer(tex->textures[i]->format))
+                        integer_s |= 1 << i;
+        *ptr = integer_s;
+}
+void
+fd3_texture_init(struct pipe_context *pctx)
+{
+        pctx->create_sampler_state = fd3_sampler_state_create;
+        pctx->bind_sampler_states = fd3_sampler_states_bind;
+        pctx->create_sampler_view = fd3_sampler_view_create;
+        pctx->set_sampler_views = fd3_set_sampler_views;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
 ,0 → 1,68
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_TEXTURE_H_
+#define FD3_TEXTURE_H_
+#include "pipe/p_context.h"
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+#include "fd3_context.h"
+#include "fd3_format.h"
+struct fd3_sampler_stateobj {
+        struct pipe_sampler_state base;
+        uint32_t texsamp0, texsamp1;
+        bool saturate_s, saturate_t, saturate_r;
+};
+static INLINE struct fd3_sampler_stateobj *
+fd3_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+        return (struct fd3_sampler_stateobj *)samp;
+}
+struct fd3_pipe_sampler_view {
+        struct pipe_sampler_view base;
+        uint32_t texconst0, texconst1, texconst2, texconst3;
+};
+static INLINE struct fd3_pipe_sampler_view *
+fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+        return (struct fd3_pipe_sampler_view *)pview;
+}
+unsigned fd3_get_const_idx(struct fd_context *ctx,
+                struct fd_texture_stateobj *tex, unsigned samp_id);
+void fd3_texture_init(struct pipe_context *pctx);
+#endif /* FD3_TEXTURE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c
 ,0 → 1,104
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd3_zsa.h"
+#include "fd3_context.h"
+#include "fd3_format.h"
+void *
+fd3_zsa_state_create(struct pipe_context *pctx,
+                const struct pipe_depth_stencil_alpha_state *cso)
+{
+        struct fd3_zsa_stateobj *so;
+        so = CALLOC_STRUCT(fd3_zsa_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        so->rb_depth_control |=
+                        A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+        if (cso->depth.enabled)
+                so->rb_depth_control |=
+                        A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                        A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+        if (cso->depth.writemask)
+                so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+        if (cso->stencil[0].enabled) {
+                const struct pipe_stencil_state *s = &cso->stencil[0];
+                so->rb_stencil_control |=
+                        A3XX_RB_STENCIL_CONTROL_STENCIL_READ |
+                        A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                        A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+                        A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+                        A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+                        A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+                so->rb_stencilrefmask |=
+xff000000 | /* ??? */
+                        A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+                        A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+                if (cso->stencil[1].enabled) {
+                        const struct pipe_stencil_state *bs = &cso->stencil[1];
+                        so->rb_stencil_control |=
+                                A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+                                A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+                                A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+                                A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+                                A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+                        so->rb_stencilrefmask_bf |=
+xff000000 | /* ??? */
+                                A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+                                A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+                }
+        }
+        if (cso->alpha.enabled) {
+                so->rb_render_control =
+                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST |
+                        A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
+                so->rb_alpha_ref =
+                        A3XX_RB_ALPHA_REF_UINT(cso->alpha.ref_value * 255.0) |
+                        A3XX_RB_ALPHA_REF_FLOAT(cso->alpha.ref_value);
+                so->rb_depth_control |=
+                        A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+        }
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
 ,0 → 1,57
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD3_ZSA_H_
+#define FD3_ZSA_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "freedreno_util.h"
+struct fd3_zsa_stateobj {
+        struct pipe_depth_stencil_alpha_state base;
+        uint32_t rb_render_control;
+        uint32_t rb_alpha_ref;
+        uint32_t rb_depth_control;
+        uint32_t rb_stencil_control;
+        uint32_t rb_stencilrefmask;
+        uint32_t rb_stencilrefmask_bf;
+};
+static INLINE struct fd3_zsa_stateobj *
+fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+        return (struct fd3_zsa_stateobj *)zsa;
+}
+void * fd3_zsa_state_create(struct pipe_context *pctx,
+                const struct pipe_depth_stencil_alpha_state *cso);
+#endif /* FD3_ZSA_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
 ,0 → 1,2618
+#ifndef A4XX_XML
+#define A4XX_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+Copyright (C) 2013-2015 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+enum a4xx_color_fmt {
+        RB4_A8_UNORM = 1,
+        RB4_R8_UNORM = 2,
+        RB4_R4G4B4A4_UNORM = 8,
+        RB4_R5G5B5A1_UNORM = 10,
+        RB4_R5G6R5_UNORM = 14,
+        RB4_R8G8_UNORM = 15,
+        RB4_R8G8_SNORM = 16,
+        RB4_R8G8_UINT = 17,
+        RB4_R8G8_SINT = 18,
+        RB4_R16_FLOAT = 21,
+        RB4_R16_UINT = 22,
+        RB4_R16_SINT = 23,
+        RB4_R8G8B8_UNORM = 25,
+        RB4_R8G8B8A8_UNORM = 26,
+        RB4_R8G8B8A8_SNORM = 28,
+        RB4_R8G8B8A8_UINT = 29,
+        RB4_R8G8B8A8_SINT = 30,
+        RB4_R10G10B10A2_UNORM = 31,
+        RB4_R10G10B10A2_UINT = 34,
+        RB4_R11G11B10_FLOAT = 39,
+        RB4_R16G16_FLOAT = 42,
+        RB4_R16G16_UINT = 43,
+        RB4_R16G16_SINT = 44,
+        RB4_R32_FLOAT = 45,
+        RB4_R32_UINT = 46,
+        RB4_R32_SINT = 47,
+        RB4_R16G16B16A16_FLOAT = 54,
+        RB4_R16G16B16A16_UINT = 55,
+        RB4_R16G16B16A16_SINT = 56,
+        RB4_R32G32_FLOAT = 57,
+        RB4_R32G32_UINT = 58,
+        RB4_R32G32_SINT = 59,
+        RB4_R32G32B32A32_FLOAT = 60,
+        RB4_R32G32B32A32_UINT = 61,
+        RB4_R32G32B32A32_SINT = 62,
+};
+enum a4xx_tile_mode {
+        TILE4_LINEAR = 0,
+        TILE4_3 = 3,
+};
+enum a4xx_rb_blend_opcode {
+        BLEND_DST_PLUS_SRC = 0,
+        BLEND_SRC_MINUS_DST = 1,
+        BLEND_DST_MINUS_SRC = 2,
+        BLEND_MIN_DST_SRC = 3,
+        BLEND_MAX_DST_SRC = 4,
+};
+enum a4xx_vtx_fmt {
+        VFMT4_32_FLOAT = 1,
+        VFMT4_32_32_FLOAT = 2,
+        VFMT4_32_32_32_FLOAT = 3,
+        VFMT4_32_32_32_32_FLOAT = 4,
+        VFMT4_16_FLOAT = 5,
+        VFMT4_16_16_FLOAT = 6,
+        VFMT4_16_16_16_FLOAT = 7,
+        VFMT4_16_16_16_16_FLOAT = 8,
+        VFMT4_32_FIXED = 9,
+        VFMT4_32_32_FIXED = 10,
+        VFMT4_32_32_32_FIXED = 11,
+        VFMT4_32_32_32_32_FIXED = 12,
+        VFMT4_16_SINT = 16,
+        VFMT4_16_16_SINT = 17,
+        VFMT4_16_16_16_SINT = 18,
+        VFMT4_16_16_16_16_SINT = 19,
+        VFMT4_16_UINT = 20,
+        VFMT4_16_16_UINT = 21,
+        VFMT4_16_16_16_UINT = 22,
+        VFMT4_16_16_16_16_UINT = 23,
+        VFMT4_16_SNORM = 24,
+        VFMT4_16_16_SNORM = 25,
+        VFMT4_16_16_16_SNORM = 26,
+        VFMT4_16_16_16_16_SNORM = 27,
+        VFMT4_16_UNORM = 28,
+        VFMT4_16_16_UNORM = 29,
+        VFMT4_16_16_16_UNORM = 30,
+        VFMT4_16_16_16_16_UNORM = 31,
+        VFMT4_32_UINT = 32,
+        VFMT4_32_32_UINT = 33,
+        VFMT4_32_32_32_UINT = 34,
+        VFMT4_32_32_32_32_UINT = 35,
+        VFMT4_32_SINT = 36,
+        VFMT4_32_32_SINT = 37,
+        VFMT4_32_32_32_SINT = 38,
+        VFMT4_32_32_32_32_SINT = 39,
+        VFMT4_8_UINT = 40,
+        VFMT4_8_8_UINT = 41,
+        VFMT4_8_8_8_UINT = 42,
+        VFMT4_8_8_8_8_UINT = 43,
+        VFMT4_8_UNORM = 44,
+        VFMT4_8_8_UNORM = 45,
+        VFMT4_8_8_8_UNORM = 46,
+        VFMT4_8_8_8_8_UNORM = 47,
+        VFMT4_8_SINT = 48,
+        VFMT4_8_8_SINT = 49,
+        VFMT4_8_8_8_SINT = 50,
+        VFMT4_8_8_8_8_SINT = 51,
+        VFMT4_8_SNORM = 52,
+        VFMT4_8_8_SNORM = 53,
+        VFMT4_8_8_8_SNORM = 54,
+        VFMT4_8_8_8_8_SNORM = 55,
+        VFMT4_10_10_10_2_UINT = 60,
+        VFMT4_10_10_10_2_UNORM = 61,
+        VFMT4_10_10_10_2_SINT = 62,
+        VFMT4_10_10_10_2_SNORM = 63,
+};
+enum a4xx_tex_fmt {
+        TFMT4_5_6_5_UNORM = 11,
+        TFMT4_5_5_5_1_UNORM = 10,
+        TFMT4_4_4_4_4_UNORM = 8,
+        TFMT4_X8Z24_UNORM = 71,
+        TFMT4_10_10_10_2_UNORM = 33,
+        TFMT4_A8_UNORM = 3,
+        TFMT4_L8_A8_UNORM = 13,
+        TFMT4_8_UNORM = 4,
+        TFMT4_8_8_UNORM = 14,
+        TFMT4_8_8_8_8_UNORM = 28,
+        TFMT4_8_8_SNORM = 15,
+        TFMT4_8_8_8_8_SNORM = 29,
+        TFMT4_8_8_UINT = 16,
+        TFMT4_8_8_8_8_UINT = 30,
+        TFMT4_8_8_SINT = 17,
+        TFMT4_8_8_8_8_SINT = 31,
+        TFMT4_16_UINT = 21,
+        TFMT4_16_16_UINT = 41,
+        TFMT4_16_16_16_16_UINT = 54,
+        TFMT4_16_SINT = 22,
+        TFMT4_16_16_SINT = 42,
+        TFMT4_16_16_16_16_SINT = 55,
+        TFMT4_32_UINT = 44,
+        TFMT4_32_32_UINT = 57,
+        TFMT4_32_32_32_32_UINT = 64,
+        TFMT4_32_SINT = 45,
+        TFMT4_32_32_SINT = 58,
+        TFMT4_32_32_32_32_SINT = 65,
+        TFMT4_16_FLOAT = 20,
+        TFMT4_16_16_FLOAT = 40,
+        TFMT4_16_16_16_16_FLOAT = 53,
+        TFMT4_32_FLOAT = 43,
+        TFMT4_32_32_FLOAT = 56,
+        TFMT4_32_32_32_32_FLOAT = 63,
+        TFMT4_9_9_9_E5_FLOAT = 32,
+        TFMT4_11_11_10_FLOAT = 37,
+        TFMT4_ATC_RGB = 100,
+        TFMT4_ATC_RGBA_EXPLICIT = 101,
+        TFMT4_ATC_RGBA_INTERPOLATED = 102,
+        TFMT4_ETC2_RG11_UNORM = 103,
+        TFMT4_ETC2_RG11_SNORM = 104,
+        TFMT4_ETC2_R11_UNORM = 105,
+        TFMT4_ETC2_R11_SNORM = 106,
+        TFMT4_ETC1 = 107,
+        TFMT4_ETC2_RGB8 = 108,
+        TFMT4_ETC2_RGBA8 = 109,
+        TFMT4_ETC2_RGB8A1 = 110,
+        TFMT4_ASTC_4x4 = 111,
+        TFMT4_ASTC_5x4 = 112,
+        TFMT4_ASTC_5x5 = 113,
+        TFMT4_ASTC_6x5 = 114,
+        TFMT4_ASTC_6x6 = 115,
+        TFMT4_ASTC_8x5 = 116,
+        TFMT4_ASTC_8x6 = 117,
+        TFMT4_ASTC_8x8 = 118,
+        TFMT4_ASTC_10x5 = 119,
+        TFMT4_ASTC_10x6 = 120,
+        TFMT4_ASTC_10x8 = 121,
+        TFMT4_ASTC_10x10 = 122,
+        TFMT4_ASTC_12x10 = 123,
+        TFMT4_ASTC_12x12 = 124,
+};
+enum a4xx_tex_fetchsize {
+        TFETCH4_1_BYTE = 0,
+        TFETCH4_2_BYTE = 1,
+        TFETCH4_4_BYTE = 2,
+        TFETCH4_8_BYTE = 3,
+        TFETCH4_16_BYTE = 4,
+};
+enum a4xx_depth_format {
+        DEPTH4_NONE = 0,
+        DEPTH4_16 = 1,
+        DEPTH4_24_8 = 2,
+};
+enum a4xx_tess_spacing {
+        EQUAL_SPACING = 0,
+        ODD_SPACING = 2,
+        EVEN_SPACING = 3,
+};
+enum a4xx_tex_filter {
+        A4XX_TEX_NEAREST = 0,
+        A4XX_TEX_LINEAR = 1,
+        A4XX_TEX_ANISO = 2,
+};
+enum a4xx_tex_clamp {
+        A4XX_TEX_REPEAT = 0,
+        A4XX_TEX_CLAMP_TO_EDGE = 1,
+        A4XX_TEX_MIRROR_REPEAT = 2,
+        A4XX_TEX_CLAMP_NONE = 3,
+};
+enum a4xx_tex_aniso {
+        A4XX_TEX_ANISO_1 = 0,
+        A4XX_TEX_ANISO_2 = 1,
+        A4XX_TEX_ANISO_4 = 2,
+        A4XX_TEX_ANISO_8 = 3,
+        A4XX_TEX_ANISO_16 = 4,
+};
+enum a4xx_tex_swiz {
+        A4XX_TEX_X = 0,
+        A4XX_TEX_Y = 1,
+        A4XX_TEX_Z = 2,
+        A4XX_TEX_W = 3,
+        A4XX_TEX_ZERO = 4,
+        A4XX_TEX_ONE = 5,
+};
+enum a4xx_tex_type {
+        A4XX_TEX_1D = 0,
+        A4XX_TEX_2D = 1,
+        A4XX_TEX_CUBE = 2,
+        A4XX_TEX_3D = 3,
+};
+#define A4XX_CGC_HLSQ_EARLY_CYC__MASK                           0x00700000
+#define A4XX_CGC_HLSQ_EARLY_CYC__SHIFT                          20
+static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val)
+{
+        return ((val) << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT) & A4XX_CGC_HLSQ_EARLY_CYC__MASK;
+}
+#define A4XX_INT0_RBBM_GPU_IDLE                                 0x00000001
+#define A4XX_INT0_RBBM_AHB_ERROR                                0x00000002
+#define A4XX_INT0_RBBM_REG_TIMEOUT                              0x00000004
+#define A4XX_INT0_RBBM_ME_MS_TIMEOUT                            0x00000008
+#define A4XX_INT0_RBBM_PFP_MS_TIMEOUT                           0x00000010
+#define A4XX_INT0_RBBM_ATB_BUS_OVERFLOW                         0x00000020
+#define A4XX_INT0_VFD_ERROR                                     0x00000040
+#define A4XX_INT0_CP_SW_INT                                     0x00000080
+#define A4XX_INT0_CP_T0_PACKET_IN_IB                            0x00000100
+#define A4XX_INT0_CP_OPCODE_ERROR                               0x00000200
+#define A4XX_INT0_CP_RESERVED_BIT_ERROR                         0x00000400
+#define A4XX_INT0_CP_HW_FAULT                                   0x00000800
+#define A4XX_INT0_CP_DMA                                        0x00001000
+#define A4XX_INT0_CP_IB2_INT                                    0x00002000
+#define A4XX_INT0_CP_IB1_INT                                    0x00004000
+#define A4XX_INT0_CP_RB_INT                                     0x00008000
+#define A4XX_INT0_CP_REG_PROTECT_FAULT                          0x00010000
+#define A4XX_INT0_CP_RB_DONE_TS                                 0x00020000
+#define A4XX_INT0_CP_VS_DONE_TS                                 0x00040000
+#define A4XX_INT0_CP_PS_DONE_TS                                 0x00080000
+#define A4XX_INT0_CACHE_FLUSH_TS                                0x00100000
+#define A4XX_INT0_CP_AHB_ERROR_HALT                             0x00200000
+#define A4XX_INT0_MISC_HANG_DETECT                              0x01000000
+#define A4XX_INT0_UCHE_OOB_ACCESS                               0x02000000
+#define REG_A4XX_RB_GMEM_BASE_ADDR                              0x00000cc0
+#define REG_A4XX_RB_PERFCTR_RB_SEL_0                            0x00000cc7
+#define REG_A4XX_RB_PERFCTR_RB_SEL_1                            0x00000cc8
+#define REG_A4XX_RB_PERFCTR_RB_SEL_2                            0x00000cc9
+#define REG_A4XX_RB_PERFCTR_RB_SEL_3                            0x00000cca
+#define REG_A4XX_RB_PERFCTR_RB_SEL_4                            0x00000ccb
+#define REG_A4XX_RB_PERFCTR_RB_SEL_5                            0x00000ccc
+#define REG_A4XX_RB_PERFCTR_RB_SEL_6                            0x00000ccd
+#define REG_A4XX_RB_PERFCTR_RB_SEL_7                            0x00000cce
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_3                           0x00000cd2
+#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION                      0x00000ce0
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK              0x00003fff
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT             0
+static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(uint32_t val)
+{
+        return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH__MASK;
+}
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK             0x3fff0000
+#define A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT            16
+static inline uint32_t A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(uint32_t val)
+{
+        return ((val) << A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__SHIFT) & A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT__MASK;
+}
+#define REG_A4XX_RB_CLEAR_COLOR_DW0                             0x000020cc
+#define REG_A4XX_RB_CLEAR_COLOR_DW1                             0x000020cd
+#define REG_A4XX_RB_CLEAR_COLOR_DW2                             0x000020ce
+#define REG_A4XX_RB_CLEAR_COLOR_DW3                             0x000020cf
+#define REG_A4XX_RB_MODE_CONTROL                                0x000020a0
+#define A4XX_RB_MODE_CONTROL_WIDTH__MASK                        0x0000003f
+#define A4XX_RB_MODE_CONTROL_WIDTH__SHIFT                       0
+static inline uint32_t A4XX_RB_MODE_CONTROL_WIDTH(uint32_t val)
+{
+        return ((val >> 5) << A4XX_RB_MODE_CONTROL_WIDTH__SHIFT) & A4XX_RB_MODE_CONTROL_WIDTH__MASK;
+}
+#define A4XX_RB_MODE_CONTROL_HEIGHT__MASK                       0x00003f00
+#define A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT                      8
+static inline uint32_t A4XX_RB_MODE_CONTROL_HEIGHT(uint32_t val)
+{
+        return ((val >> 5) << A4XX_RB_MODE_CONTROL_HEIGHT__SHIFT) & A4XX_RB_MODE_CONTROL_HEIGHT__MASK;
+}
+#define REG_A4XX_RB_RENDER_CONTROL                              0x000020a1
+#define A4XX_RB_RENDER_CONTROL_BINNING_PASS                     0x00000001
+#define A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE               0x00000020
+#define REG_A4XX_RB_MSAA_CONTROL                                0x000020a2
+#define A4XX_RB_MSAA_CONTROL_DISABLE                            0x00001000
+#define A4XX_RB_MSAA_CONTROL_SAMPLES__MASK                      0x0000e000
+#define A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT                     13
+static inline uint32_t A4XX_RB_MSAA_CONTROL_SAMPLES(uint32_t val)
+{
+        return ((val) << A4XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A4XX_RB_MSAA_CONTROL_SAMPLES__MASK;
+}
+#define REG_A4XX_RB_RENDER_CONTROL2                             0x000020a3
+#define A4XX_RB_RENDER_CONTROL2_XCOORD                          0x00000001
+#define A4XX_RB_RENDER_CONTROL2_YCOORD                          0x00000002
+#define A4XX_RB_RENDER_CONTROL2_ZCOORD                          0x00000004
+#define A4XX_RB_RENDER_CONTROL2_WCOORD                          0x00000008
+#define A4XX_RB_RENDER_CONTROL2_SAMPLEMASK                      0x00000010
+#define A4XX_RB_RENDER_CONTROL2_FACENESS                        0x00000020
+#define A4XX_RB_RENDER_CONTROL2_SAMPLEID                        0x00000040
+#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK              0x00000380
+#define A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT             7
+static inline uint32_t A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__SHIFT) & A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES__MASK;
+}
+#define A4XX_RB_RENDER_CONTROL2_SAMPLEID_HR                     0x00000800
+#define A4XX_RB_RENDER_CONTROL2_VARYING                         0x00001000
+static inline uint32_t REG_A4XX_RB_MRT(uint32_t i0) { return 0x000020a4 + 0x5*i0; }
+static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4 + 0x5*i0; }
+#define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE                    0x00000008
+#define A4XX_RB_MRT_CONTROL_BLEND                               0x00000010
+#define A4XX_RB_MRT_CONTROL_BLEND2                              0x00000020
+#define A4XX_RB_MRT_CONTROL_FASTCLEAR                           0x00000400
+#define A4XX_RB_MRT_CONTROL_B11                                 0x00000800
+#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK              0x0f000000
+#define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT             24
+static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+        return ((val) << A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+static inline uint32_t REG_A4XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020a5 + 0x5*i0; }
+#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK                 0x0000003f
+#define A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT                0
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a4xx_color_fmt val)
+{
+        return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK              0x000000c0
+#define A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT             6
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a4xx_tile_mode val)
+{
+        return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK                  0x00000600
+#define A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT                 9
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+        return ((val) << A4XX_RB_MRT_BUF_INFO_DITHER_MODE__SHIFT) & A4XX_RB_MRT_BUF_INFO_DITHER_MODE__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK                   0x00001800
+#define A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT                  11
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A4XX_RB_MRT_BUF_INFO_COLOR_SRGB                         0x00002000
+#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK              0x007fc000
+#define A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT             14
+static inline uint32_t A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+{
+        return ((val >> 4) << A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+}
+static inline uint32_t REG_A4XX_RB_MRT_BASE(uint32_t i0) { return 0x000020a6 + 0x5*i0; }
+static inline uint32_t REG_A4XX_RB_MRT_CONTROL3(uint32_t i0) { return 0x000020a7 + 0x5*i0; }
+#define A4XX_RB_MRT_CONTROL3_STRIDE__MASK                       0x0001fff8
+#define A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT                      3
+static inline uint32_t A4XX_RB_MRT_CONTROL3_STRIDE(uint32_t val)
+{
+        return ((val) << A4XX_RB_MRT_CONTROL3_STRIDE__SHIFT) & A4XX_RB_MRT_CONTROL3_STRIDE__MASK;
+}
+static inline uint32_t REG_A4XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020a8 + 0x5*i0; }
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK          0x0000001f
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT         0
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK        0x000000e0
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT       5
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum a4xx_rb_blend_opcode val)
+{
+        return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK         0x00001f00
+#define A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT        8
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK        0x001f0000
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT       16
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK      0x00e00000
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT     21
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum a4xx_rb_blend_opcode val)
+{
+        return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK       0x1f000000
+#define A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT      24
+static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+        return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+#define REG_A4XX_RB_BLEND_RED                                   0x000020f3
+#define A4XX_RB_BLEND_RED_UINT__MASK                            0x00007fff
+#define A4XX_RB_BLEND_RED_UINT__SHIFT                           0
+static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val)
+{
+        return ((val) << A4XX_RB_BLEND_RED_UINT__SHIFT) & A4XX_RB_BLEND_RED_UINT__MASK;
+}
+#define A4XX_RB_BLEND_RED_FLOAT__MASK                           0xffff0000
+#define A4XX_RB_BLEND_RED_FLOAT__SHIFT                          16
+static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK;
+}
+#define REG_A4XX_RB_BLEND_GREEN                                 0x000020f4
+#define A4XX_RB_BLEND_GREEN_UINT__MASK                          0x00007fff
+#define A4XX_RB_BLEND_GREEN_UINT__SHIFT                         0
+static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val)
+{
+        return ((val) << A4XX_RB_BLEND_GREEN_UINT__SHIFT) & A4XX_RB_BLEND_GREEN_UINT__MASK;
+}
+#define A4XX_RB_BLEND_GREEN_FLOAT__MASK                         0xffff0000
+#define A4XX_RB_BLEND_GREEN_FLOAT__SHIFT                        16
+static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK;
+}
+#define REG_A4XX_RB_BLEND_BLUE                                  0x000020f5
+#define A4XX_RB_BLEND_BLUE_UINT__MASK                           0x00007fff
+#define A4XX_RB_BLEND_BLUE_UINT__SHIFT                          0
+static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val)
+{
+        return ((val) << A4XX_RB_BLEND_BLUE_UINT__SHIFT) & A4XX_RB_BLEND_BLUE_UINT__MASK;
+}
+#define A4XX_RB_BLEND_BLUE_FLOAT__MASK                          0xffff0000
+#define A4XX_RB_BLEND_BLUE_FLOAT__SHIFT                         16
+static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK;
+}
+#define REG_A4XX_RB_BLEND_ALPHA                                 0x000020f6
+#define A4XX_RB_BLEND_ALPHA_UINT__MASK                          0x00007fff
+#define A4XX_RB_BLEND_ALPHA_UINT__SHIFT                         0
+static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val)
+{
+        return ((val) << A4XX_RB_BLEND_ALPHA_UINT__SHIFT) & A4XX_RB_BLEND_ALPHA_UINT__MASK;
+}
+#define A4XX_RB_BLEND_ALPHA_FLOAT__MASK                         0xffff0000
+#define A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT                        16
+static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val)
+{
+        return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK;
+}
+#define REG_A4XX_RB_ALPHA_CONTROL                               0x000020f8
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK                   0x000000ff
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT                  0
+static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_REF(uint32_t val)
+{
+        return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK;
+}
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST                        0x00000100
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK             0x00000e00
+#define A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT            9
+static inline uint32_t A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+        return ((val) << A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+#define REG_A4XX_RB_FS_OUTPUT                                   0x000020f9
+#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK                    0x000000ff
+#define A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT                   0
+static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
+{
+        return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
+}
+#define A4XX_RB_FS_OUTPUT_FAST_CLEAR                            0x00000100
+#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK                     0xffff0000
+#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT                    16
+static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
+{
+        return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK;
+}
+#define REG_A4XX_RB_RENDER_COMPONENTS                           0x000020fb
+#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK                     0x0000000f
+#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT                    0
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT0(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT0__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT1__MASK                     0x000000f0
+#define A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT                    4
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT1(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT1__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT1__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT2__MASK                     0x00000f00
+#define A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT                    8
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT2(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT2__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT2__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT3__MASK                     0x0000f000
+#define A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT                    12
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT3(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT3__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT3__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT4__MASK                     0x000f0000
+#define A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT                    16
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT4(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT4__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT4__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT5__MASK                     0x00f00000
+#define A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT                    20
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT5(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT5__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT5__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT6__MASK                     0x0f000000
+#define A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT                    24
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT6(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT6__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT6__MASK;
+}
+#define A4XX_RB_RENDER_COMPONENTS_RT7__MASK                     0xf0000000
+#define A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT                    28
+static inline uint32_t A4XX_RB_RENDER_COMPONENTS_RT7(uint32_t val)
+{
+        return ((val) << A4XX_RB_RENDER_COMPONENTS_RT7__SHIFT) & A4XX_RB_RENDER_COMPONENTS_RT7__MASK;
+}
+#define REG_A4XX_RB_COPY_CONTROL                                0x000020fc
+#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK                 0x00000003
+#define A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT                0
+static inline uint32_t A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
+{
+        return ((val) << A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A4XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
+}
+#define A4XX_RB_COPY_CONTROL_MODE__MASK                         0x00000070
+#define A4XX_RB_COPY_CONTROL_MODE__SHIFT                        4
+static inline uint32_t A4XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
+{
+        return ((val) << A4XX_RB_COPY_CONTROL_MODE__SHIFT) & A4XX_RB_COPY_CONTROL_MODE__MASK;
+}
+#define A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK                    0x00000f00
+#define A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT                   8
+static inline uint32_t A4XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
+{
+        return ((val) << A4XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A4XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
+}
+#define A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK                    0xffffc000
+#define A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT                   14
+static inline uint32_t A4XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
+{
+        return ((val >> 14) << A4XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A4XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
+}
+#define REG_A4XX_RB_COPY_DEST_BASE                              0x000020fd
+#define A4XX_RB_COPY_DEST_BASE_BASE__MASK                       0xffffffe0
+#define A4XX_RB_COPY_DEST_BASE_BASE__SHIFT                      5
+static inline uint32_t A4XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
+{
+        return ((val >> 5) << A4XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A4XX_RB_COPY_DEST_BASE_BASE__MASK;
+}
+#define REG_A4XX_RB_COPY_DEST_PITCH                             0x000020fe
+#define A4XX_RB_COPY_DEST_PITCH_PITCH__MASK                     0xffffffff
+#define A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT                    0
+static inline uint32_t A4XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
+{
+        return ((val >> 5) << A4XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A4XX_RB_COPY_DEST_PITCH_PITCH__MASK;
+}
+#define REG_A4XX_RB_COPY_DEST_INFO                              0x000020ff
+#define A4XX_RB_COPY_DEST_INFO_FORMAT__MASK                     0x000000fc
+#define A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT                    2
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_FORMAT(enum a4xx_color_fmt val)
+{
+        return ((val) << A4XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A4XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_SWAP__MASK                       0x00000300
+#define A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT                      8
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A4XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A4XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK                0x00000c00
+#define A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT               10
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+        return ((val) << A4XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A4XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK           0x0003c000
+#define A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT          14
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
+{
+        return ((val) << A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK                     0x001c0000
+#define A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT                    18
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
+{
+        return ((val) << A4XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A4XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
+}
+#define A4XX_RB_COPY_DEST_INFO_TILE__MASK                       0x03000000
+#define A4XX_RB_COPY_DEST_INFO_TILE__SHIFT                      24
+static inline uint32_t A4XX_RB_COPY_DEST_INFO_TILE(enum a4xx_tile_mode val)
+{
+        return ((val) << A4XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A4XX_RB_COPY_DEST_INFO_TILE__MASK;
+}
+#define REG_A4XX_RB_FS_OUTPUT_REG                               0x00002100
+#define A4XX_RB_FS_OUTPUT_REG_MRT__MASK                         0x0000000f
+#define A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT                        0
+static inline uint32_t A4XX_RB_FS_OUTPUT_REG_MRT(uint32_t val)
+{
+        return ((val) << A4XX_RB_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_RB_FS_OUTPUT_REG_MRT__MASK;
+}
+#define A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z                     0x00000020
+#define REG_A4XX_RB_DEPTH_CONTROL                               0x00002101
+#define A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z                     0x00000001
+#define A4XX_RB_DEPTH_CONTROL_Z_ENABLE                          0x00000002
+#define A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE                    0x00000004
+#define A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK                       0x00000070
+#define A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT                      4
+static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
+{
+        return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
+}
+#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE                         0x00000080
+#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE                   0x00010000
+#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE                     0x80000000
+#define REG_A4XX_RB_DEPTH_CLEAR                                 0x00002102
+#define REG_A4XX_RB_DEPTH_INFO                                  0x00002103
+#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK                   0x00000003
+#define A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT                  0
+static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum a4xx_depth_format val)
+{
+        return ((val) << A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK                     0xfffff000
+#define A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT                    12
+static inline uint32_t A4XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+        return ((val >> 12) << A4XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A4XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+#define REG_A4XX_RB_DEPTH_PITCH                                 0x00002104
+#define A4XX_RB_DEPTH_PITCH__MASK                               0xffffffff
+#define A4XX_RB_DEPTH_PITCH__SHIFT                              0
+static inline uint32_t A4XX_RB_DEPTH_PITCH(uint32_t val)
+{
+        return ((val >> 5) << A4XX_RB_DEPTH_PITCH__SHIFT) & A4XX_RB_DEPTH_PITCH__MASK;
+}
+#define REG_A4XX_RB_DEPTH_PITCH2                                0x00002105
+#define A4XX_RB_DEPTH_PITCH2__MASK                              0xffffffff
+#define A4XX_RB_DEPTH_PITCH2__SHIFT                             0
+static inline uint32_t A4XX_RB_DEPTH_PITCH2(uint32_t val)
+{
+        return ((val >> 5) << A4XX_RB_DEPTH_PITCH2__SHIFT) & A4XX_RB_DEPTH_PITCH2__MASK;
+}
+#define REG_A4XX_RB_STENCIL_CONTROL                             0x00002106
+#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE                  0x00000001
+#define A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF               0x00000002
+#define A4XX_RB_STENCIL_CONTROL_STENCIL_READ                    0x00000004
+#define A4XX_RB_STENCIL_CONTROL_FUNC__MASK                      0x00000700
+#define A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT                     8
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_FAIL__MASK                      0x00003800
+#define A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT                     11
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZPASS__MASK                     0x0001c000
+#define A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT                    14
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK                     0x000e0000
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT                    17
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK                   0x00700000
+#define A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT                  20
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK                   0x03800000
+#define A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT                  23
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK                  0x1c000000
+#define A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT                 26
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK                  0xe0000000
+#define A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT                 29
+static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+        return ((val) << A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A4XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+#define REG_A4XX_RB_STENCIL_CONTROL2                            0x00002107
+#define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER                 0x00000001
+#define REG_A4XX_RB_STENCILREFMASK                              0x0000210b
+#define A4XX_RB_STENCILREFMASK_STENCILREF__MASK                 0x000000ff
+#define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT                0
+static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+        return ((val) << A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_STENCILMASK__MASK                0x0000ff00
+#define A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT               8
+static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+        return ((val) << A4XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK           0x00ff0000
+#define A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT          16
+static inline uint32_t A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+        return ((val) << A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+#define REG_A4XX_RB_STENCILREFMASK_BF                           0x0000210c
+#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK              0x000000ff
+#define A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT             0
+static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+        return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK             0x0000ff00
+#define A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT            8
+static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+        return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK        0x00ff0000
+#define A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT       16
+static inline uint32_t A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+        return ((val) << A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+#define REG_A4XX_RB_BIN_OFFSET                                  0x0000210d
+#define A4XX_RB_BIN_OFFSET_WINDOW_OFFSET_DISABLE                0x80000000
+#define A4XX_RB_BIN_OFFSET_X__MASK                              0x00007fff
+#define A4XX_RB_BIN_OFFSET_X__SHIFT                             0
+static inline uint32_t A4XX_RB_BIN_OFFSET_X(uint32_t val)
+{
+        return ((val) << A4XX_RB_BIN_OFFSET_X__SHIFT) & A4XX_RB_BIN_OFFSET_X__MASK;
+}
+#define A4XX_RB_BIN_OFFSET_Y__MASK                              0x7fff0000
+#define A4XX_RB_BIN_OFFSET_Y__SHIFT                             16
+static inline uint32_t A4XX_RB_BIN_OFFSET_Y(uint32_t val)
+{
+        return ((val) << A4XX_RB_BIN_OFFSET_Y__SHIFT) & A4XX_RB_BIN_OFFSET_Y__MASK;
+}
+static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP(uint32_t i0) { return 0x00002120 + 0x2*i0; }
+static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MIN(uint32_t i0) { return 0x00002120 + 0x2*i0; }
+static inline uint32_t REG_A4XX_RB_VPORT_Z_CLAMP_MAX(uint32_t i0) { return 0x00002121 + 0x2*i0; }
+#define REG_A4XX_RBBM_HW_VERSION                                0x00000000
+#define REG_A4XX_RBBM_HW_CONFIGURATION                          0x00000002
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP(uint32_t i0) { return 0x00000004 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_TP_REG(uint32_t i0) { return 0x00000004 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP(uint32_t i0) { return 0x00000008 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_TP_REG(uint32_t i0) { return 0x00000008 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP(uint32_t i0) { return 0x0000000c + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_TP_REG(uint32_t i0) { return 0x0000000c + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP(uint32_t i0) { return 0x00000010 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x00000010 + 0x1*i0; }
+#define REG_A4XX_RBBM_CLOCK_CTL_UCHE                            0x00000014
+#define REG_A4XX_RBBM_CLOCK_CTL2_UCHE                           0x00000015
+#define REG_A4XX_RBBM_CLOCK_CTL3_UCHE                           0x00000016
+#define REG_A4XX_RBBM_CLOCK_CTL4_UCHE                           0x00000017
+#define REG_A4XX_RBBM_CLOCK_HYST_UCHE                           0x00000018
+#define REG_A4XX_RBBM_CLOCK_DELAY_UCHE                          0x00000019
+#define REG_A4XX_RBBM_CLOCK_MODE_GPC                            0x0000001a
+#define REG_A4XX_RBBM_CLOCK_DELAY_GPC                           0x0000001b
+#define REG_A4XX_RBBM_CLOCK_HYST_GPC                            0x0000001c
+#define REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM                    0x0000001d
+#define REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM                   0x0000001e
+#define REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM                  0x0000001f
+#define REG_A4XX_RBBM_CLOCK_CTL                                 0x00000020
+#define REG_A4XX_RBBM_SP_HYST_CNT                               0x00000021
+#define REG_A4XX_RBBM_SW_RESET_CMD                              0x00000022
+#define REG_A4XX_RBBM_AHB_CTL0                                  0x00000023
+#define REG_A4XX_RBBM_AHB_CTL1                                  0x00000024
+#define REG_A4XX_RBBM_AHB_CMD                                   0x00000025
+#define REG_A4XX_RBBM_RB_SUB_BLOCK_SEL_CTL                      0x00000026
+#define REG_A4XX_RBBM_RAM_ACC_63_32                             0x00000028
+#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL                      0x0000002b
+#define REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL                    0x0000002f
+#define REG_A4XX_RBBM_INTERFACE_HANG_MASK_CTL4                  0x00000034
+#define REG_A4XX_RBBM_INT_CLEAR_CMD                             0x00000036
+#define REG_A4XX_RBBM_INT_0_MASK                                0x00000037
+#define REG_A4XX_RBBM_RBBM_CTL                                  0x0000003e
+#define REG_A4XX_RBBM_AHB_DEBUG_CTL                             0x0000003f
+#define REG_A4XX_RBBM_VBIF_DEBUG_CTL                            0x00000041
+#define REG_A4XX_RBBM_CLOCK_CTL2                                0x00000042
+#define REG_A4XX_RBBM_BLOCK_SW_RESET_CMD                        0x00000045
+#define REG_A4XX_RBBM_RESET_CYCLES                              0x00000047
+#define REG_A4XX_RBBM_EXT_TRACE_BUS_CTL                         0x00000049
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_A                          0x0000004a
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_B                          0x0000004b
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_C                          0x0000004c
+#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D                          0x0000004d
+#define REG_A4XX_RBBM_PERFCTR_CP_0_LO                           0x0000009c
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP(uint32_t i0) { return 0x0000006c + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_SP_REG(uint32_t i0) { return 0x0000006c + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP(uint32_t i0) { return 0x00000070 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_SP_REG(uint32_t i0) { return 0x00000070 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP(uint32_t i0) { return 0x00000074 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_SP_REG(uint32_t i0) { return 0x00000074 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB(uint32_t i0) { return 0x00000078 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_RB_REG(uint32_t i0) { return 0x00000078 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB(uint32_t i0) { return 0x0000007c + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL2_RB_REG(uint32_t i0) { return 0x0000007c + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(uint32_t i0) { return 0x00000082 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU_REG(uint32_t i0) { return 0x00000082 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(uint32_t i0) { return 0x00000086 + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU_REG(uint32_t i0) { return 0x00000086 + 0x1*i0; }
+#define REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM                       0x00000080
+#define REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM                        0x00000081
+#define REG_A4XX_RBBM_CLOCK_CTL_HLSQ                            0x0000008a
+#define REG_A4XX_RBBM_CLOCK_HYST_HLSQ                           0x0000008b
+#define REG_A4XX_RBBM_CLOCK_DELAY_HLSQ                          0x0000008c
+#define REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM                      0x0000008d
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { return 0x0000008e + 0x1*i0; }
+static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; }
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO                          0x00000168
+#define REG_A4XX_RBBM_PERFCTR_CTL                               0x00000170
+#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD0                         0x00000171
+#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD1                         0x00000172
+#define REG_A4XX_RBBM_PERFCTR_LOAD_CMD2                         0x00000173
+#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_LO                     0x00000174
+#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI                     0x00000175
+#define REG_A4XX_RBBM_GPU_BUSY_MASKED                           0x0000017a
+#define REG_A4XX_RBBM_INT_0_STATUS                              0x0000017d
+#define REG_A4XX_RBBM_CLOCK_STATUS                              0x00000182
+#define REG_A4XX_RBBM_AHB_STATUS                                0x00000189
+#define REG_A4XX_RBBM_AHB_ME_SPLIT_STATUS                       0x0000018c
+#define REG_A4XX_RBBM_AHB_PFP_SPLIT_STATUS                      0x0000018d
+#define REG_A4XX_RBBM_AHB_ERROR_STATUS                          0x0000018f
+#define REG_A4XX_RBBM_STATUS                                    0x00000191
+#define A4XX_RBBM_STATUS_HI_BUSY                                0x00000001
+#define A4XX_RBBM_STATUS_CP_ME_BUSY                             0x00000002
+#define A4XX_RBBM_STATUS_CP_PFP_BUSY                            0x00000004
+#define A4XX_RBBM_STATUS_CP_NRT_BUSY                            0x00004000
+#define A4XX_RBBM_STATUS_VBIF_BUSY                              0x00008000
+#define A4XX_RBBM_STATUS_TSE_BUSY                               0x00010000
+#define A4XX_RBBM_STATUS_RAS_BUSY                               0x00020000
+#define A4XX_RBBM_STATUS_RB_BUSY                                0x00040000
+#define A4XX_RBBM_STATUS_PC_DCALL_BUSY                          0x00080000
+#define A4XX_RBBM_STATUS_PC_VSD_BUSY                            0x00100000
+#define A4XX_RBBM_STATUS_VFD_BUSY                               0x00200000
+#define A4XX_RBBM_STATUS_VPC_BUSY                               0x00400000
+#define A4XX_RBBM_STATUS_UCHE_BUSY                              0x00800000
+#define A4XX_RBBM_STATUS_SP_BUSY                                0x01000000
+#define A4XX_RBBM_STATUS_TPL1_BUSY                              0x02000000
+#define A4XX_RBBM_STATUS_MARB_BUSY                              0x04000000
+#define A4XX_RBBM_STATUS_VSC_BUSY                               0x08000000
+#define A4XX_RBBM_STATUS_ARB_BUSY                               0x10000000
+#define A4XX_RBBM_STATUS_HLSQ_BUSY                              0x20000000
+#define A4XX_RBBM_STATUS_GPU_BUSY_NOHC                          0x40000000
+#define A4XX_RBBM_STATUS_GPU_BUSY                               0x80000000
+#define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5                    0x0000019f
+#define REG_A4XX_CP_SCRATCH_UMASK                               0x00000228
+#define REG_A4XX_CP_SCRATCH_ADDR                                0x00000229
+#define REG_A4XX_CP_RB_BASE                                     0x00000200
+#define REG_A4XX_CP_RB_CNTL                                     0x00000201
+#define REG_A4XX_CP_RB_WPTR                                     0x00000205
+#define REG_A4XX_CP_RB_RPTR_ADDR                                0x00000203
+#define REG_A4XX_CP_RB_RPTR                                     0x00000204
+#define REG_A4XX_CP_IB1_BASE                                    0x00000206
+#define REG_A4XX_CP_IB1_BUFSZ                                   0x00000207
+#define REG_A4XX_CP_IB2_BASE                                    0x00000208
+#define REG_A4XX_CP_IB2_BUFSZ                                   0x00000209
+#define REG_A4XX_CP_ME_NRT_ADDR                                 0x0000020c
+#define REG_A4XX_CP_ME_NRT_DATA                                 0x0000020d
+#define REG_A4XX_CP_ME_RB_DONE_DATA                             0x00000217
+#define REG_A4XX_CP_QUEUE_THRESH2                               0x00000219
+#define REG_A4XX_CP_MERCIU_SIZE                                 0x0000021b
+#define REG_A4XX_CP_ROQ_ADDR                                    0x0000021c
+#define REG_A4XX_CP_ROQ_DATA                                    0x0000021d
+#define REG_A4XX_CP_MEQ_ADDR                                    0x0000021e
+#define REG_A4XX_CP_MEQ_DATA                                    0x0000021f
+#define REG_A4XX_CP_MERCIU_ADDR                                 0x00000220
+#define REG_A4XX_CP_MERCIU_DATA                                 0x00000221
+#define REG_A4XX_CP_MERCIU_DATA2                                0x00000222
+#define REG_A4XX_CP_PFP_UCODE_ADDR                              0x00000223
+#define REG_A4XX_CP_PFP_UCODE_DATA                              0x00000224
+#define REG_A4XX_CP_ME_RAM_WADDR                                0x00000225
+#define REG_A4XX_CP_ME_RAM_RADDR                                0x00000226
+#define REG_A4XX_CP_ME_RAM_DATA                                 0x00000227
+#define REG_A4XX_CP_PREEMPT                                     0x0000022a
+#define REG_A4XX_CP_CNTL                                        0x0000022c
+#define REG_A4XX_CP_ME_CNTL                                     0x0000022d
+#define REG_A4XX_CP_DEBUG                                       0x0000022e
+#define REG_A4XX_CP_DEBUG_ECO_CONTROL                           0x00000231
+#define REG_A4XX_CP_DRAW_STATE_ADDR                             0x00000232
+#define REG_A4XX_CP_PROTECT_REG_0                               0x00000240
+static inline uint32_t REG_A4XX_CP_PROTECT(uint32_t i0) { return 0x00000240 + 0x1*i0; }
+static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 + 0x1*i0; }
+#define REG_A4XX_CP_PROTECT_CTRL                                0x00000250
+#define REG_A4XX_CP_ST_BASE                                     0x000004c0
+#define REG_A4XX_CP_STQ_AVAIL                                   0x000004ce
+#define REG_A4XX_CP_MERCIU_STAT                                 0x000004d0
+#define REG_A4XX_CP_WFI_PEND_CTR                                0x000004d2
+#define REG_A4XX_CP_HW_FAULT                                    0x000004d8
+#define REG_A4XX_CP_PROTECT_STATUS                              0x000004da
+#define REG_A4XX_CP_EVENTS_IN_FLIGHT                            0x000004dd
+#define REG_A4XX_CP_PERFCTR_CP_SEL_0                            0x00000500
+#define REG_A4XX_CP_PERFCOMBINER_SELECT                         0x0000050b
+static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; }
+static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578 + 0x1*i0; }
+#define REG_A4XX_SP_VS_STATUS                                   0x00000ec0
+#define REG_A4XX_SP_PERFCTR_SP_SEL_11                           0x00000ecf
+#define REG_A4XX_SP_SP_CTRL_REG                                 0x000022c0
+#define A4XX_SP_SP_CTRL_REG_BINNING_PASS                        0x00080000
+#define REG_A4XX_SP_INSTR_CACHE_CTRL                            0x000022c1
+#define A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER                      0x00000080
+#define A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER                      0x00000100
+#define A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER                   0x00000400
+#define REG_A4XX_SP_VS_CTRL_REG0                                0x000022c4
+#define A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK                   0x00000001
+#define A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT                  0
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+        return ((val) << A4XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_VARYING                            0x00000002
+#define A4XX_SP_VS_CTRL_REG0_CACHEINVALID                       0x00000004
+#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK             0x000003f0
+#define A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT            4
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK             0x0003fc00
+#define A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT            10
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK              0x000c0000
+#define A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT             18
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK                   0x00100000
+#define A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT                  20
+static inline uint32_t A4XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A4XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE                    0x00200000
+#define A4XX_SP_VS_CTRL_REG0_PIXLODENABLE                       0x00400000
+#define REG_A4XX_SP_VS_CTRL_REG1                                0x000022c5
+#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK                  0x000000ff
+#define A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT                 0
+static inline uint32_t A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK           0x7f000000
+#define A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT          24
+static inline uint32_t A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+#define REG_A4XX_SP_VS_PARAM_REG                                0x000022c6
+#define A4XX_SP_VS_PARAM_REG_POSREGID__MASK                     0x000000ff
+#define A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT                    0
+static inline uint32_t A4XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK                   0x0000ff00
+#define A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT                  8
+static inline uint32_t A4XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A4XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
+}
+#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK                0xfff00000
+#define A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT               20
+static inline uint32_t A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+}
+static inline uint32_t REG_A4XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+static inline uint32_t REG_A4XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+#define A4XX_SP_VS_OUT_REG_A_REGID__MASK                        0x000001ff
+#define A4XX_SP_VS_OUT_REG_A_REGID__SHIFT                       0
+static inline uint32_t A4XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK                     0x00001e00
+#define A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT                    9
+static inline uint32_t A4XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_VS_OUT_REG_B_REGID__MASK                        0x01ff0000
+#define A4XX_SP_VS_OUT_REG_B_REGID__SHIFT                       16
+static inline uint32_t A4XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK                     0x1e000000
+#define A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT                    25
+static inline uint32_t A4XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+static inline uint32_t REG_A4XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d8 + 0x1*i0; }
+static inline uint32_t REG_A4XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d8 + 0x1*i0; }
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK                    0x000000ff
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT                   0
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK                    0x0000ff00
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT                   8
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK                    0x00ff0000
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT                   16
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK                    0xff000000
+#define A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT                   24
+static inline uint32_t A4XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+#define REG_A4XX_SP_VS_OBJ_OFFSET_REG                           0x000022e0
+#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK       0x01ff0000
+#define A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT      16
+static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK         0xfe000000
+#define A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT        25
+static inline uint32_t A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+#define REG_A4XX_SP_VS_OBJ_START                                0x000022e1
+#define REG_A4XX_SP_VS_PVT_MEM_PARAM                            0x000022e2
+#define REG_A4XX_SP_VS_PVT_MEM_ADDR                             0x000022e3
+#define REG_A4XX_SP_VS_LENGTH_REG                               0x000022e5
+#define REG_A4XX_SP_FS_CTRL_REG0                                0x000022e8
+#define A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK                   0x00000001
+#define A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT                  0
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+        return ((val) << A4XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_VARYING                            0x00000002
+#define A4XX_SP_FS_CTRL_REG0_CACHEINVALID                       0x00000004
+#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK             0x000003f0
+#define A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT            4
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK             0x0003fc00
+#define A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT            10
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK              0x000c0000
+#define A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT             18
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK                   0x00100000
+#define A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT                  20
+static inline uint32_t A4XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A4XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A4XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE                    0x00200000
+#define A4XX_SP_FS_CTRL_REG0_PIXLODENABLE                       0x00400000
+#define REG_A4XX_SP_FS_CTRL_REG1                                0x000022e9
+#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK                  0x000000ff
+#define A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT                 0
+static inline uint32_t A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A4XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A4XX_SP_FS_CTRL_REG1_FACENESS                           0x00080000
+#define A4XX_SP_FS_CTRL_REG1_VARYING                            0x00100000
+#define A4XX_SP_FS_CTRL_REG1_FRAGCOORD                          0x00200000
+#define REG_A4XX_SP_FS_OBJ_OFFSET_REG                           0x000022ea
+#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK       0x01ff0000
+#define A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT      16
+static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK         0xfe000000
+#define A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT        25
+static inline uint32_t A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+#define REG_A4XX_SP_FS_OBJ_START                                0x000022eb
+#define REG_A4XX_SP_FS_PVT_MEM_PARAM                            0x000022ec
+#define REG_A4XX_SP_FS_PVT_MEM_ADDR                             0x000022ed
+#define REG_A4XX_SP_FS_LENGTH_REG                               0x000022ef
+#define REG_A4XX_SP_FS_OUTPUT_REG                               0x000022f0
+#define A4XX_SP_FS_OUTPUT_REG_MRT__MASK                         0x0000000f
+#define A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT                        0
+static inline uint32_t A4XX_SP_FS_OUTPUT_REG_MRT(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A4XX_SP_FS_OUTPUT_REG_MRT__MASK;
+}
+#define A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE                      0x00000080
+#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK                 0x0000ff00
+#define A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT                8
+static inline uint32_t A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK;
+}
+#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK            0xff000000
+#define A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT           24
+static inline uint32_t A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_SP_FS_OUTPUT_REG_SAMPLEMASK_REGID__MASK;
+}
+static inline uint32_t REG_A4XX_SP_FS_MRT(uint32_t i0) { return 0x000022f1 + 0x1*i0; }
+static inline uint32_t REG_A4XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f1 + 0x1*i0; }
+#define A4XX_SP_FS_MRT_REG_REGID__MASK                          0x000000ff
+#define A4XX_SP_FS_MRT_REG_REGID__SHIFT                         0
+static inline uint32_t A4XX_SP_FS_MRT_REG_REGID(uint32_t val)
+{
+        return ((val) << A4XX_SP_FS_MRT_REG_REGID__SHIFT) & A4XX_SP_FS_MRT_REG_REGID__MASK;
+}
+#define A4XX_SP_FS_MRT_REG_HALF_PRECISION                       0x00000100
+#define A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK                      0x0003f000
+#define A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT                     12
+static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val)
+{
+        return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK;
+}
+#define REG_A4XX_SP_HS_OBJ_OFFSET_REG                           0x0000230d
+#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK       0x01ff0000
+#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT      16
+static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK         0xfe000000
+#define A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT        25
+static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+#define REG_A4XX_SP_HS_OBJ_START                                0x0000230e
+#define REG_A4XX_SP_HS_PVT_MEM_PARAM                            0x0000230f
+#define REG_A4XX_SP_HS_PVT_MEM_ADDR                             0x00002310
+#define REG_A4XX_SP_HS_LENGTH_REG                               0x00002312
+#define REG_A4XX_SP_DS_OBJ_OFFSET_REG                           0x00002334
+#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK       0x01ff0000
+#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT      16
+static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK         0xfe000000
+#define A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT        25
+static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+#define REG_A4XX_SP_DS_OBJ_START                                0x00002335
+#define REG_A4XX_SP_DS_PVT_MEM_PARAM                            0x00002336
+#define REG_A4XX_SP_DS_PVT_MEM_ADDR                             0x00002337
+#define REG_A4XX_SP_DS_LENGTH_REG                               0x00002339
+#define REG_A4XX_SP_GS_OBJ_OFFSET_REG                           0x0000235b
+#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK       0x01ff0000
+#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT      16
+static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK         0xfe000000
+#define A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT        25
+static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+#define REG_A4XX_SP_GS_OBJ_START                                0x0000235c
+#define REG_A4XX_SP_GS_PVT_MEM_PARAM                            0x0000235d
+#define REG_A4XX_SP_GS_PVT_MEM_ADDR                             0x0000235e
+#define REG_A4XX_SP_GS_LENGTH_REG                               0x00002360
+#define REG_A4XX_VPC_DEBUG_RAM_SEL                              0x00000e60
+#define REG_A4XX_VPC_DEBUG_RAM_READ                             0x00000e61
+#define REG_A4XX_VPC_DEBUG_ECO_CONTROL                          0x00000e64
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3                          0x00000e68
+#define REG_A4XX_VPC_ATTR                                       0x00002140
+#define A4XX_VPC_ATTR_TOTALATTR__MASK                           0x000001ff
+#define A4XX_VPC_ATTR_TOTALATTR__SHIFT                          0
+static inline uint32_t A4XX_VPC_ATTR_TOTALATTR(uint32_t val)
+{
+        return ((val) << A4XX_VPC_ATTR_TOTALATTR__SHIFT) & A4XX_VPC_ATTR_TOTALATTR__MASK;
+}
+#define A4XX_VPC_ATTR_PSIZE                                     0x00000200
+#define A4XX_VPC_ATTR_THRDASSIGN__MASK                          0x00003000
+#define A4XX_VPC_ATTR_THRDASSIGN__SHIFT                         12
+static inline uint32_t A4XX_VPC_ATTR_THRDASSIGN(uint32_t val)
+{
+        return ((val) << A4XX_VPC_ATTR_THRDASSIGN__SHIFT) & A4XX_VPC_ATTR_THRDASSIGN__MASK;
+}
+#define A4XX_VPC_ATTR_ENABLE                                    0x02000000
+#define REG_A4XX_VPC_PACK                                       0x00002141
+#define A4XX_VPC_PACK_NUMBYPASSVAR__MASK                        0x000000ff
+#define A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT                       0
+static inline uint32_t A4XX_VPC_PACK_NUMBYPASSVAR(uint32_t val)
+{
+        return ((val) << A4XX_VPC_PACK_NUMBYPASSVAR__SHIFT) & A4XX_VPC_PACK_NUMBYPASSVAR__MASK;
+}
+#define A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK                      0x0000ff00
+#define A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT                     8
+static inline uint32_t A4XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
+{
+        return ((val) << A4XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A4XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
+}
+#define A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK                      0x00ff0000
+#define A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT                     16
+static inline uint32_t A4XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+{
+        return ((val) << A4XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A4XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+}
+static inline uint32_t REG_A4XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002142 + 0x1*i0; }
+static inline uint32_t REG_A4XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002142 + 0x1*i0; }
+static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x0000214a + 0x1*i0; }
+static inline uint32_t REG_A4XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x0000214a + 0x1*i0; }
+#define REG_A4XX_VPC_SO_FLUSH_WADDR_3                           0x0000216e
+#define REG_A4XX_VSC_BIN_SIZE                                   0x00000c00
+#define A4XX_VSC_BIN_SIZE_WIDTH__MASK                           0x0000001f
+#define A4XX_VSC_BIN_SIZE_WIDTH__SHIFT                          0
+static inline uint32_t A4XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+        return ((val >> 5) << A4XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A4XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A4XX_VSC_BIN_SIZE_HEIGHT__MASK                          0x000003e0
+#define A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT                         5
+static inline uint32_t A4XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+        return ((val >> 5) << A4XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A4XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+#define REG_A4XX_VSC_SIZE_ADDRESS                               0x00000c01
+#define REG_A4XX_VSC_SIZE_ADDRESS2                              0x00000c02
+#define REG_A4XX_VSC_DEBUG_ECO_CONTROL                          0x00000c03
+static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c08 + 0x1*i0; }
+static inline uint32_t REG_A4XX_VSC_PIPE_CONFIG_REG(uint32_t i0) { return 0x00000c08 + 0x1*i0; }
+#define A4XX_VSC_PIPE_CONFIG_REG_X__MASK                        0x000003ff
+#define A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT                       0
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_X(uint32_t val)
+{
+        return ((val) << A4XX_VSC_PIPE_CONFIG_REG_X__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_X__MASK;
+}
+#define A4XX_VSC_PIPE_CONFIG_REG_Y__MASK                        0x000ffc00
+#define A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT                       10
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_Y(uint32_t val)
+{
+        return ((val) << A4XX_VSC_PIPE_CONFIG_REG_Y__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_Y__MASK;
+}
+#define A4XX_VSC_PIPE_CONFIG_REG_W__MASK                        0x00f00000
+#define A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT                       20
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_W(uint32_t val)
+{
+        return ((val) << A4XX_VSC_PIPE_CONFIG_REG_W__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_W__MASK;
+}
+#define A4XX_VSC_PIPE_CONFIG_REG_H__MASK                        0x0f000000
+#define A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT                       24
+static inline uint32_t A4XX_VSC_PIPE_CONFIG_REG_H(uint32_t val)
+{
+        return ((val) << A4XX_VSC_PIPE_CONFIG_REG_H__SHIFT) & A4XX_VSC_PIPE_CONFIG_REG_H__MASK;
+}
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(uint32_t i0) { return 0x00000c10 + 0x1*i0; }
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c18 + 0x1*i0; }
+static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0x00000c18 + 0x1*i0; }
+#define REG_A4XX_VSC_PIPE_PARTIAL_POSN_1                        0x00000c41
+#define REG_A4XX_VSC_PERFCTR_VSC_SEL_0                          0x00000c50
+#define REG_A4XX_VSC_PERFCTR_VSC_SEL_1                          0x00000c51
+#define REG_A4XX_VFD_DEBUG_CONTROL                              0x00000e40
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7                          0x00000e4a
+#define REG_A4XX_VGT_CL_INITIATOR                               0x000021d0
+#define REG_A4XX_VGT_EVENT_INITIATOR                            0x000021d9
+#define REG_A4XX_VFD_CONTROL_0                                  0x00002200
+#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK                  0x000000ff
+#define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT                 0
+static inline uint32_t A4XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
+}
+#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK                  0x0001fe00
+#define A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT                 9
+static inline uint32_t A4XX_VFD_CONTROL_0_BYPASSATTROVS(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_0_BYPASSATTROVS__SHIFT) & A4XX_VFD_CONTROL_0_BYPASSATTROVS__MASK;
+}
+#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK                0x03f00000
+#define A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT               20
+static inline uint32_t A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
+}
+#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK              0xfc000000
+#define A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT             26
+static inline uint32_t A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
+}
+#define REG_A4XX_VFD_CONTROL_1                                  0x00002201
+#define A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK                     0x0000ffff
+#define A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT                    0
+static inline uint32_t A4XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A4XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
+}
+#define A4XX_VFD_CONTROL_1_REGID4VTX__MASK                      0x00ff0000
+#define A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT                     16
+static inline uint32_t A4XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A4XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A4XX_VFD_CONTROL_1_REGID4INST__MASK                     0xff000000
+#define A4XX_VFD_CONTROL_1_REGID4INST__SHIFT                    24
+static inline uint32_t A4XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A4XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+#define REG_A4XX_VFD_CONTROL_2                                  0x00002202
+#define REG_A4XX_VFD_CONTROL_3                                  0x00002203
+#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK                   0x0000ff00
+#define A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT                  8
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_VTXCNT(uint32_t val)
+{
+        return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK;
+}
+#define REG_A4XX_VFD_CONTROL_4                                  0x00002204
+#define REG_A4XX_VFD_INDEX_OFFSET                               0x00002208
+static inline uint32_t REG_A4XX_VFD_FETCH(uint32_t i0) { return 0x0000220a + 0x4*i0; }
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x0000220a + 0x4*i0; }
+#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK                  0x0000007f
+#define A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT                 0
+static inline uint32_t A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+{
+        return ((val) << A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
+}
+#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK                  0x0001ff80
+#define A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT                 7
+static inline uint32_t A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
+{
+        return ((val) << A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
+}
+#define A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT                       0x00080000
+#define A4XX_VFD_FETCH_INSTR_0_INSTANCED                        0x00100000
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x0000220b + 0x4*i0; }
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_2(uint32_t i0) { return 0x0000220c + 0x4*i0; }
+#define A4XX_VFD_FETCH_INSTR_2_SIZE__MASK                       0xfffffff0
+#define A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT                      4
+static inline uint32_t A4XX_VFD_FETCH_INSTR_2_SIZE(uint32_t val)
+{
+        return ((val >> 4) << A4XX_VFD_FETCH_INSTR_2_SIZE__SHIFT) & A4XX_VFD_FETCH_INSTR_2_SIZE__MASK;
+}
+static inline uint32_t REG_A4XX_VFD_FETCH_INSTR_3(uint32_t i0) { return 0x0000220d + 0x4*i0; }
+#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK                   0x000001ff
+#define A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT                  0
+static inline uint32_t A4XX_VFD_FETCH_INSTR_3_STEPRATE(uint32_t val)
+{
+        return ((val) << A4XX_VFD_FETCH_INSTR_3_STEPRATE__SHIFT) & A4XX_VFD_FETCH_INSTR_3_STEPRATE__MASK;
+}
+static inline uint32_t REG_A4XX_VFD_DECODE(uint32_t i0) { return 0x0000228a + 0x1*i0; }
+static inline uint32_t REG_A4XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x0000228a + 0x1*i0; }
+#define A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK                   0x0000000f
+#define A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT                  0
+static inline uint32_t A4XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+{
+        return ((val) << A4XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A4XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_CONSTFILL                         0x00000010
+#define A4XX_VFD_DECODE_INSTR_FORMAT__MASK                      0x00000fc0
+#define A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT                     6
+static inline uint32_t A4XX_VFD_DECODE_INSTR_FORMAT(enum a4xx_vtx_fmt val)
+{
+        return ((val) << A4XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A4XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_REGID__MASK                       0x000ff000
+#define A4XX_VFD_DECODE_INSTR_REGID__SHIFT                      12
+static inline uint32_t A4XX_VFD_DECODE_INSTR_REGID(uint32_t val)
+{
+        return ((val) << A4XX_VFD_DECODE_INSTR_REGID__SHIFT) & A4XX_VFD_DECODE_INSTR_REGID__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_INT                               0x00100000
+#define A4XX_VFD_DECODE_INSTR_SWAP__MASK                        0x00c00000
+#define A4XX_VFD_DECODE_INSTR_SWAP__SHIFT                       22
+static inline uint32_t A4XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A4XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A4XX_VFD_DECODE_INSTR_SWAP__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK                    0x1f000000
+#define A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT                   24
+static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
+{
+        return ((val) << A4XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A4XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
+}
+#define A4XX_VFD_DECODE_INSTR_LASTCOMPVALID                     0x20000000
+#define A4XX_VFD_DECODE_INSTR_SWITCHNEXT                        0x40000000
+#define REG_A4XX_TPL1_DEBUG_ECO_CONTROL                         0x00000f00
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7                          0x00000f0b
+#define REG_A4XX_TPL1_TP_TEX_OFFSET                             0x00002380
+#define REG_A4XX_TPL1_TP_TEX_COUNT                              0x00002381
+#define A4XX_TPL1_TP_TEX_COUNT_VS__MASK                         0x000000ff
+#define A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT                        0
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_VS(uint32_t val)
+{
+        return ((val) << A4XX_TPL1_TP_TEX_COUNT_VS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_VS__MASK;
+}
+#define A4XX_TPL1_TP_TEX_COUNT_HS__MASK                         0x0000ff00
+#define A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT                        8
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_HS(uint32_t val)
+{
+        return ((val) << A4XX_TPL1_TP_TEX_COUNT_HS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_HS__MASK;
+}
+#define A4XX_TPL1_TP_TEX_COUNT_DS__MASK                         0x00ff0000
+#define A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT                        16
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_DS(uint32_t val)
+{
+        return ((val) << A4XX_TPL1_TP_TEX_COUNT_DS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_DS__MASK;
+}
+#define A4XX_TPL1_TP_TEX_COUNT_GS__MASK                         0xff000000
+#define A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT                        24
+static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
+{
+        return ((val) << A4XX_TPL1_TP_TEX_COUNT_GS__SHIFT) & A4XX_TPL1_TP_TEX_COUNT_GS__MASK;
+}
+#define REG_A4XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR              0x00002384
+#define REG_A4XX_TPL1_TP_HS_BORDER_COLOR_BASE_ADDR              0x00002387
+#define REG_A4XX_TPL1_TP_DS_BORDER_COLOR_BASE_ADDR              0x0000238a
+#define REG_A4XX_TPL1_TP_GS_BORDER_COLOR_BASE_ADDR              0x0000238d
+#define REG_A4XX_TPL1_TP_FS_TEX_COUNT                           0x000023a0
+#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR              0x000023a1
+#define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR                 0x000023a6
+#define REG_A4XX_GRAS_TSE_STATUS                                0x00000c80
+#define REG_A4XX_GRAS_DEBUG_ECO_CONTROL                         0x00000c81
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0                         0x00000c88
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3                         0x00000c8b
+#define REG_A4XX_GRAS_CL_CLIP_CNTL                              0x00002000
+#define REG_A4XX_GRAS_CLEAR_CNTL                                0x00002003
+#define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR                      0x00000001
+#define REG_A4XX_GRAS_CL_GB_CLIP_ADJ                            0x00002004
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK                     0x000003ff
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT                    0
+static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
+}
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK                     0x000ffc00
+#define A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT                    10
+static inline uint32_t A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A4XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
+}
+#define REG_A4XX_GRAS_CL_VPORT_XOFFSET_0                        0x00002008
+#define A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK                      0xffffffff
+#define A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT                     0
+static inline uint32_t A4XX_GRAS_CL_VPORT_XOFFSET_0(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_CL_VPORT_XOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_XOFFSET_0__MASK;
+}
+#define REG_A4XX_GRAS_CL_VPORT_XSCALE_0                         0x00002009
+#define A4XX_GRAS_CL_VPORT_XSCALE_0__MASK                       0xffffffff
+#define A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT                      0
+static inline uint32_t A4XX_GRAS_CL_VPORT_XSCALE_0(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_CL_VPORT_XSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_XSCALE_0__MASK;
+}
+#define REG_A4XX_GRAS_CL_VPORT_YOFFSET_0                        0x0000200a
+#define A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK                      0xffffffff
+#define A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT                     0
+static inline uint32_t A4XX_GRAS_CL_VPORT_YOFFSET_0(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_CL_VPORT_YOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_YOFFSET_0__MASK;
+}
+#define REG_A4XX_GRAS_CL_VPORT_YSCALE_0                         0x0000200b
+#define A4XX_GRAS_CL_VPORT_YSCALE_0__MASK                       0xffffffff
+#define A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT                      0
+static inline uint32_t A4XX_GRAS_CL_VPORT_YSCALE_0(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_CL_VPORT_YSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_YSCALE_0__MASK;
+}
+#define REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0                        0x0000200c
+#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK                      0xffffffff
+#define A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT                     0
+static inline uint32_t A4XX_GRAS_CL_VPORT_ZOFFSET_0(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZOFFSET_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZOFFSET_0__MASK;
+}
+#define REG_A4XX_GRAS_CL_VPORT_ZSCALE_0                         0x0000200d
+#define A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK                       0xffffffff
+#define A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT                      0
+static inline uint32_t A4XX_GRAS_CL_VPORT_ZSCALE_0(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_CL_VPORT_ZSCALE_0__SHIFT) & A4XX_GRAS_CL_VPORT_ZSCALE_0__MASK;
+}
+#define REG_A4XX_GRAS_SU_POINT_MINMAX                           0x00002070
+#define A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK                     0x0000ffff
+#define A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT                    0
+static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MIN(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK                     0xffff0000
+#define A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT                    16
+static inline uint32_t A4XX_GRAS_SU_POINT_MINMAX_MAX(float val)
+{
+        return ((((uint32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A4XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
+}
+#define REG_A4XX_GRAS_SU_POINT_SIZE                             0x00002071
+#define A4XX_GRAS_SU_POINT_SIZE__MASK                           0xffffffff
+#define A4XX_GRAS_SU_POINT_SIZE__SHIFT                          0
+static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val)
+{
+        return ((((int32_t)(val * 16.0))) << A4XX_GRAS_SU_POINT_SIZE__SHIFT) & A4XX_GRAS_SU_POINT_SIZE__MASK;
+}
+#define REG_A4XX_GRAS_ALPHA_CONTROL                             0x00002073
+#define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE               0x00000004
+#define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE                      0x00002074
+#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK                    0xffffffff
+#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT                   0
+static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_SCALE(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_SCALE__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK;
+}
+#define REG_A4XX_GRAS_SU_POLY_OFFSET_OFFSET                     0x00002075
+#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK                   0xffffffff
+#define A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT                  0
+static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+#define REG_A4XX_GRAS_SU_POLY_OFFSET_CLAMP                      0x00002076
+#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK                    0xffffffff
+#define A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT                   0
+static inline uint32_t A4XX_GRAS_SU_POLY_OFFSET_CLAMP(float val)
+{
+        return ((fui(val)) << A4XX_GRAS_SU_POLY_OFFSET_CLAMP__SHIFT) & A4XX_GRAS_SU_POLY_OFFSET_CLAMP__MASK;
+}
+#define REG_A4XX_GRAS_DEPTH_CONTROL                             0x00002077
+#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK                    0x00000003
+#define A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT                   0
+static inline uint32_t A4XX_GRAS_DEPTH_CONTROL_FORMAT(enum a4xx_depth_format val)
+{
+        return ((val) << A4XX_GRAS_DEPTH_CONTROL_FORMAT__SHIFT) & A4XX_GRAS_DEPTH_CONTROL_FORMAT__MASK;
+}
+#define REG_A4XX_GRAS_SU_MODE_CONTROL                           0x00002078
+#define A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT                    0x00000001
+#define A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK                     0x00000002
+#define A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW                      0x00000004
+#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK           0x000007f8
+#define A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT          3
+static inline uint32_t A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val)
+{
+        return ((((int32_t)(val * 4.0))) << A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
+}
+#define A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET                   0x00000800
+#define A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS                0x00100000
+#define REG_A4XX_GRAS_SC_CONTROL                                0x0000207b
+#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK                  0x0000000c
+#define A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT                 2
+static inline uint32_t A4XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+        return ((val) << A4XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
+}
+#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK                 0x00000380
+#define A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT                7
+static inline uint32_t A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
+}
+#define A4XX_GRAS_SC_CONTROL_MSAA_DISABLE                       0x00000800
+#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK                  0x0000f000
+#define A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT                 12
+static inline uint32_t A4XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A4XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
+}
+#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL                      0x0000207c
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE    0x80000000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK                  0x00007fff
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT                 0
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK                  0x7fff0000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT                 16
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+#define REG_A4XX_GRAS_SC_SCREEN_SCISSOR_BR                      0x0000207d
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE    0x80000000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK                  0x00007fff
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT                 0
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK                  0x7fff0000
+#define A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT                 16
+static inline uint32_t A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR                      0x0000209c
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE    0x80000000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK                  0x00007fff
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT                 0
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK                  0x7fff0000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT                 16
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+#define REG_A4XX_GRAS_SC_WINDOW_SCISSOR_TL                      0x0000209d
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE    0x80000000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK                  0x00007fff
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT                 0
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK                  0x7fff0000
+#define A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT                 16
+static inline uint32_t A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_BR                       0x0000209e
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_WINDOW_OFFSET_DISABLE     0x80000000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK                   0x00007fff
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT                  0
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_X(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_X__MASK;
+}
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK                   0x7fff0000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT                  16
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_BR_Y__MASK;
+}
+#define REG_A4XX_GRAS_SC_EXTENT_WINDOW_TL                       0x0000209f
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_WINDOW_OFFSET_DISABLE     0x80000000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK                   0x00007fff
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT                  0
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_X(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_X__MASK;
+}
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK                   0x7fff0000
+#define A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT                  16
+static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
+{
+        return ((val) << A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__SHIFT) & A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y__MASK;
+}
+#define REG_A4XX_UCHE_CACHE_MODE_CONTROL                        0x00000e80
+#define REG_A4XX_UCHE_TRAP_BASE_LO                              0x00000e83
+#define REG_A4XX_UCHE_TRAP_BASE_HI                              0x00000e84
+#define REG_A4XX_UCHE_CACHE_STATUS                              0x00000e88
+#define REG_A4XX_UCHE_INVALIDATE0                               0x00000e8a
+#define REG_A4XX_UCHE_INVALIDATE1                               0x00000e8b
+#define REG_A4XX_UCHE_CACHE_WAYS_VFD                            0x00000e8c
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7                        0x00000e95
+#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD                         0x00000e00
+#define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL                         0x00000e04
+#define REG_A4XX_HLSQ_PERF_PIPE_MASK                            0x00000e0e
+#define REG_A4XX_HLSQ_CONTROL_0_REG                             0x000023c0
+#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK              0x00000010
+#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT             4
+static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE             0x00000040
+#define A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART                 0x00000200
+#define A4XX_HLSQ_CONTROL_0_REG_RESERVED2                       0x00000400
+#define A4XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE                    0x04000000
+#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK                 0x08000000
+#define A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT                27
+static inline uint32_t A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A4XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK;
+}
+#define A4XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE               0x10000000
+#define A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE               0x20000000
+#define A4XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE                    0x40000000
+#define A4XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT                   0x80000000
+#define REG_A4XX_HLSQ_CONTROL_1_REG                             0x000023c1
+#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK              0x00000040
+#define A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT             6
+static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
+}
+#define A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE             0x00000100
+#define A4XX_HLSQ_CONTROL_1_REG_RESERVED1                       0x00000200
+#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK                0x00ff0000
+#define A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT               16
+static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_COORDREGID(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_1_REG_COORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_COORDREGID__MASK;
+}
+#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK              0xff000000
+#define A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT             24
+static inline uint32_t A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__SHIFT) & A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID__MASK;
+}
+#define REG_A4XX_HLSQ_CONTROL_2_REG                             0x000023c2
+#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK        0xfc000000
+#define A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT       26
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK                 0x000003fc
+#define A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT                2
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_FACEREGID(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_2_REG_FACEREGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_FACEREGID__MASK;
+}
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK            0x0003fc00
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT           10
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID__MASK;
+}
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK          0x03fc0000
+#define A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT         18
+static inline uint32_t A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__SHIFT) & A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID__MASK;
+}
+#define REG_A4XX_HLSQ_CONTROL_3_REG                             0x000023c3
+#define A4XX_HLSQ_CONTROL_3_REG_REGID__MASK                     0x000000ff
+#define A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT                    0
+static inline uint32_t A4XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK;
+}
+#define REG_A4XX_HLSQ_VS_CONTROL_REG                            0x000023c5
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK              0x000000ff
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT             0
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK        0x0000ff00
+#define A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT       8
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_VS_CONTROL_REG_ENABLED                        0x00010000
+#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK          0x00fe0000
+#define A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT         17
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK              0xff000000
+#define A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT             24
+static inline uint32_t A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+#define REG_A4XX_HLSQ_FS_CONTROL_REG                            0x000023c6
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK              0x000000ff
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT             0
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK        0x0000ff00
+#define A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT       8
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_FS_CONTROL_REG_ENABLED                        0x00010000
+#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK          0x00fe0000
+#define A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT         17
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK              0xff000000
+#define A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT             24
+static inline uint32_t A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+#define REG_A4XX_HLSQ_HS_CONTROL_REG                            0x000023c7
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK              0x000000ff
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT             0
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK        0x0000ff00
+#define A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT       8
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_HS_CONTROL_REG_ENABLED                        0x00010000
+#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK          0x00fe0000
+#define A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT         17
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK              0xff000000
+#define A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT             24
+static inline uint32_t A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+#define REG_A4XX_HLSQ_DS_CONTROL_REG                            0x000023c8
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK              0x000000ff
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT             0
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK        0x0000ff00
+#define A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT       8
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_DS_CONTROL_REG_ENABLED                        0x00010000
+#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK          0x00fe0000
+#define A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT         17
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK              0xff000000
+#define A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT             24
+static inline uint32_t A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+#define REG_A4XX_HLSQ_GS_CONTROL_REG                            0x000023c9
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK              0x000000ff
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT             0
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK        0x0000ff00
+#define A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT       8
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A4XX_HLSQ_GS_CONTROL_REG_ENABLED                        0x00010000
+#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK          0x00fe0000
+#define A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT         17
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET__MASK;
+}
+#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK              0xff000000
+#define A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT             24
+static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+        return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+#define REG_A4XX_HLSQ_UPDATE_CONTROL                            0x000023db
+#define REG_A4XX_PC_BINNING_COMMAND                             0x00000d00
+#define A4XX_PC_BINNING_COMMAND_BINNING_ENABLE                  0x00000001
+#define REG_A4XX_PC_DRAWCALL_SETUP_OVERRIDE                     0x00000d0c
+#define REG_A4XX_PC_PERFCTR_PC_SEL_0                            0x00000d10
+#define REG_A4XX_PC_PERFCTR_PC_SEL_7                            0x00000d17
+#define REG_A4XX_PC_BIN_BASE                                    0x000021c0
+#define REG_A4XX_PC_PRIM_VTX_CNTL                               0x000021c4
+#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK                      0x0000000f
+#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT                     0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
+{
+        return ((val) << A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT) & A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART                 0x00100000
+#define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST                0x02000000
+#define A4XX_PC_PRIM_VTX_CNTL_PSIZE                             0x04000000
+#define REG_A4XX_UNKNOWN_21C5                                   0x000021c5
+#define REG_A4XX_PC_RESTART_INDEX                               0x000021c6
+#define REG_A4XX_PC_GS_PARAM                                    0x000021e5
+#define A4XX_PC_GS_PARAM_MAX_VERTICES__MASK                     0x000003ff
+#define A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT                    0
+static inline uint32_t A4XX_PC_GS_PARAM_MAX_VERTICES(uint32_t val)
+{
+        return ((val) << A4XX_PC_GS_PARAM_MAX_VERTICES__SHIFT) & A4XX_PC_GS_PARAM_MAX_VERTICES__MASK;
+}
+#define A4XX_PC_GS_PARAM_INVOCATIONS__MASK                      0x0000f800
+#define A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT                     11
+static inline uint32_t A4XX_PC_GS_PARAM_INVOCATIONS(uint32_t val)
+{
+        return ((val) << A4XX_PC_GS_PARAM_INVOCATIONS__SHIFT) & A4XX_PC_GS_PARAM_INVOCATIONS__MASK;
+}
+#define A4XX_PC_GS_PARAM_PRIMTYPE__MASK                         0x01800000
+#define A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT                        23
+static inline uint32_t A4XX_PC_GS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
+{
+        return ((val) << A4XX_PC_GS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_GS_PARAM_PRIMTYPE__MASK;
+}
+#define A4XX_PC_GS_PARAM_LAYER                                  0x80000000
+#define REG_A4XX_PC_HS_PARAM                                    0x000021e7
+#define A4XX_PC_HS_PARAM_VERTICES_OUT__MASK                     0x0000003f
+#define A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT                    0
+static inline uint32_t A4XX_PC_HS_PARAM_VERTICES_OUT(uint32_t val)
+{
+        return ((val) << A4XX_PC_HS_PARAM_VERTICES_OUT__SHIFT) & A4XX_PC_HS_PARAM_VERTICES_OUT__MASK;
+}
+#define A4XX_PC_HS_PARAM_SPACING__MASK                          0x00600000
+#define A4XX_PC_HS_PARAM_SPACING__SHIFT                         21
+static inline uint32_t A4XX_PC_HS_PARAM_SPACING(enum a4xx_tess_spacing val)
+{
+        return ((val) << A4XX_PC_HS_PARAM_SPACING__SHIFT) & A4XX_PC_HS_PARAM_SPACING__MASK;
+}
+#define A4XX_PC_HS_PARAM_PRIMTYPE__MASK                         0x01800000
+#define A4XX_PC_HS_PARAM_PRIMTYPE__SHIFT                        23
+static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
+{
+        return ((val) << A4XX_PC_HS_PARAM_PRIMTYPE__SHIFT) & A4XX_PC_HS_PARAM_PRIMTYPE__MASK;
+}
+#define REG_A4XX_VBIF_VERSION                                   0x00003000
+#define REG_A4XX_VBIF_CLKON                                     0x00003001
+#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS                        0x00000001
+#define REG_A4XX_VBIF_ABIT_SORT                                 0x0000301c
+#define REG_A4XX_VBIF_ABIT_SORT_CONF                            0x0000301d
+#define REG_A4XX_VBIF_GATE_OFF_WRREQ_EN                         0x0000302a
+#define REG_A4XX_VBIF_IN_RD_LIM_CONF0                           0x0000302c
+#define REG_A4XX_VBIF_IN_RD_LIM_CONF1                           0x0000302d
+#define REG_A4XX_VBIF_IN_WR_LIM_CONF0                           0x00003030
+#define REG_A4XX_VBIF_IN_WR_LIM_CONF1                           0x00003031
+#define REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB                       0x00003049
+#define REG_A4XX_UNKNOWN_0CC5                                   0x00000cc5
+#define REG_A4XX_UNKNOWN_0CC6                                   0x00000cc6
+#define REG_A4XX_UNKNOWN_0D01                                   0x00000d01
+#define REG_A4XX_UNKNOWN_0E05                                   0x00000e05
+#define REG_A4XX_UNKNOWN_0E42                                   0x00000e42
+#define REG_A4XX_UNKNOWN_0EC2                                   0x00000ec2
+#define REG_A4XX_UNKNOWN_0EC3                                   0x00000ec3
+#define REG_A4XX_UNKNOWN_0F03                                   0x00000f03
+#define REG_A4XX_UNKNOWN_2001                                   0x00002001
+#define REG_A4XX_UNKNOWN_209B                                   0x0000209b
+#define REG_A4XX_UNKNOWN_20EF                                   0x000020ef
+#define REG_A4XX_UNKNOWN_20F0                                   0x000020f0
+#define REG_A4XX_UNKNOWN_20F1                                   0x000020f1
+#define REG_A4XX_UNKNOWN_20F2                                   0x000020f2
+#define REG_A4XX_UNKNOWN_20F7                                   0x000020f7
+#define A4XX_UNKNOWN_20F7__MASK                                 0xffffffff
+#define A4XX_UNKNOWN_20F7__SHIFT                                0
+static inline uint32_t A4XX_UNKNOWN_20F7(float val)
+{
+        return ((fui(val)) << A4XX_UNKNOWN_20F7__SHIFT) & A4XX_UNKNOWN_20F7__MASK;
+}
+#define REG_A4XX_UNKNOWN_2152                                   0x00002152
+#define REG_A4XX_UNKNOWN_2153                                   0x00002153
+#define REG_A4XX_UNKNOWN_2154                                   0x00002154
+#define REG_A4XX_UNKNOWN_2155                                   0x00002155
+#define REG_A4XX_UNKNOWN_2156                                   0x00002156
+#define REG_A4XX_UNKNOWN_2157                                   0x00002157
+#define REG_A4XX_UNKNOWN_21C3                                   0x000021c3
+#define REG_A4XX_UNKNOWN_21E6                                   0x000021e6
+#define REG_A4XX_UNKNOWN_2209                                   0x00002209
+#define REG_A4XX_UNKNOWN_22D7                                   0x000022d7
+#define REG_A4XX_TEX_SAMP_0                                     0x00000000
+#define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR                   0x00000001
+#define A4XX_TEX_SAMP_0_XY_MAG__MASK                            0x00000006
+#define A4XX_TEX_SAMP_0_XY_MAG__SHIFT                           1
+static inline uint32_t A4XX_TEX_SAMP_0_XY_MAG(enum a4xx_tex_filter val)
+{
+        return ((val) << A4XX_TEX_SAMP_0_XY_MAG__SHIFT) & A4XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A4XX_TEX_SAMP_0_XY_MIN__MASK                            0x00000018
+#define A4XX_TEX_SAMP_0_XY_MIN__SHIFT                           3
+static inline uint32_t A4XX_TEX_SAMP_0_XY_MIN(enum a4xx_tex_filter val)
+{
+        return ((val) << A4XX_TEX_SAMP_0_XY_MIN__SHIFT) & A4XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A4XX_TEX_SAMP_0_WRAP_S__MASK                            0x000000e0
+#define A4XX_TEX_SAMP_0_WRAP_S__SHIFT                           5
+static inline uint32_t A4XX_TEX_SAMP_0_WRAP_S(enum a4xx_tex_clamp val)
+{
+        return ((val) << A4XX_TEX_SAMP_0_WRAP_S__SHIFT) & A4XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A4XX_TEX_SAMP_0_WRAP_T__MASK                            0x00000700
+#define A4XX_TEX_SAMP_0_WRAP_T__SHIFT                           8
+static inline uint32_t A4XX_TEX_SAMP_0_WRAP_T(enum a4xx_tex_clamp val)
+{
+        return ((val) << A4XX_TEX_SAMP_0_WRAP_T__SHIFT) & A4XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A4XX_TEX_SAMP_0_WRAP_R__MASK                            0x00003800
+#define A4XX_TEX_SAMP_0_WRAP_R__SHIFT                           11
+static inline uint32_t A4XX_TEX_SAMP_0_WRAP_R(enum a4xx_tex_clamp val)
+{
+        return ((val) << A4XX_TEX_SAMP_0_WRAP_R__SHIFT) & A4XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A4XX_TEX_SAMP_0_ANISO__MASK                             0x0001c000
+#define A4XX_TEX_SAMP_0_ANISO__SHIFT                            14
+static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
+{
+        return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
+}
+#define REG_A4XX_TEX_SAMP_1                                     0x00000001
+#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK                      0x0000000e
+#define A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT                     1
+static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val)
+{
+        return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
+}
+#define A4XX_TEX_SAMP_1_UNNORM_COORDS                           0x00000020
+#define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR                    0x00000040
+#define A4XX_TEX_SAMP_1_MAX_LOD__MASK                           0x000fff00
+#define A4XX_TEX_SAMP_1_MAX_LOD__SHIFT                          8
+static inline uint32_t A4XX_TEX_SAMP_1_MAX_LOD(float val)
+{
+        return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A4XX_TEX_SAMP_1_MAX_LOD__MASK;
+}
+#define A4XX_TEX_SAMP_1_MIN_LOD__MASK                           0xfff00000
+#define A4XX_TEX_SAMP_1_MIN_LOD__SHIFT                          20
+static inline uint32_t A4XX_TEX_SAMP_1_MIN_LOD(float val)
+{
+        return ((((uint32_t)(val * 256.0))) << A4XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A4XX_TEX_SAMP_1_MIN_LOD__MASK;
+}
+#define REG_A4XX_TEX_CONST_0                                    0x00000000
+#define A4XX_TEX_CONST_0_TILED                                  0x00000001
+#define A4XX_TEX_CONST_0_SRGB                                   0x00000004
+#define A4XX_TEX_CONST_0_SWIZ_X__MASK                           0x00000070
+#define A4XX_TEX_CONST_0_SWIZ_X__SHIFT                          4
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_X(enum a4xx_tex_swiz val)
+{
+        return ((val) << A4XX_TEX_CONST_0_SWIZ_X__SHIFT) & A4XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A4XX_TEX_CONST_0_SWIZ_Y__MASK                           0x00000380
+#define A4XX_TEX_CONST_0_SWIZ_Y__SHIFT                          7
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Y(enum a4xx_tex_swiz val)
+{
+        return ((val) << A4XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A4XX_TEX_CONST_0_SWIZ_Z__MASK                           0x00001c00
+#define A4XX_TEX_CONST_0_SWIZ_Z__SHIFT                          10
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_Z(enum a4xx_tex_swiz val)
+{
+        return ((val) << A4XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A4XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A4XX_TEX_CONST_0_SWIZ_W__MASK                           0x0000e000
+#define A4XX_TEX_CONST_0_SWIZ_W__SHIFT                          13
+static inline uint32_t A4XX_TEX_CONST_0_SWIZ_W(enum a4xx_tex_swiz val)
+{
+        return ((val) << A4XX_TEX_CONST_0_SWIZ_W__SHIFT) & A4XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A4XX_TEX_CONST_0_MIPLVLS__MASK                          0x000f0000
+#define A4XX_TEX_CONST_0_MIPLVLS__SHIFT                         16
+static inline uint32_t A4XX_TEX_CONST_0_MIPLVLS(uint32_t val)
+{
+        return ((val) << A4XX_TEX_CONST_0_MIPLVLS__SHIFT) & A4XX_TEX_CONST_0_MIPLVLS__MASK;
+}
+#define A4XX_TEX_CONST_0_FMT__MASK                              0x1fc00000
+#define A4XX_TEX_CONST_0_FMT__SHIFT                             22
+static inline uint32_t A4XX_TEX_CONST_0_FMT(enum a4xx_tex_fmt val)
+{
+        return ((val) << A4XX_TEX_CONST_0_FMT__SHIFT) & A4XX_TEX_CONST_0_FMT__MASK;
+}
+#define A4XX_TEX_CONST_0_TYPE__MASK                             0x60000000
+#define A4XX_TEX_CONST_0_TYPE__SHIFT                            29
+static inline uint32_t A4XX_TEX_CONST_0_TYPE(enum a4xx_tex_type val)
+{
+        return ((val) << A4XX_TEX_CONST_0_TYPE__SHIFT) & A4XX_TEX_CONST_0_TYPE__MASK;
+}
+#define REG_A4XX_TEX_CONST_1                                    0x00000001
+#define A4XX_TEX_CONST_1_HEIGHT__MASK                           0x00007fff
+#define A4XX_TEX_CONST_1_HEIGHT__SHIFT                          0
+static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+        return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
+}
+#define A4XX_TEX_CONST_1_WIDTH__MASK                            0x1fff8000
+#define A4XX_TEX_CONST_1_WIDTH__SHIFT                           15
+static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+        return ((val) << A4XX_TEX_CONST_1_WIDTH__SHIFT) & A4XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define REG_A4XX_TEX_CONST_2                                    0x00000002
+#define A4XX_TEX_CONST_2_FETCHSIZE__MASK                        0x0000000f
+#define A4XX_TEX_CONST_2_FETCHSIZE__SHIFT                       0
+static inline uint32_t A4XX_TEX_CONST_2_FETCHSIZE(enum a4xx_tex_fetchsize val)
+{
+        return ((val) << A4XX_TEX_CONST_2_FETCHSIZE__SHIFT) & A4XX_TEX_CONST_2_FETCHSIZE__MASK;
+}
+#define A4XX_TEX_CONST_2_PITCH__MASK                            0x3ffffe00
+#define A4XX_TEX_CONST_2_PITCH__SHIFT                           9
+static inline uint32_t A4XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+        return ((val) << A4XX_TEX_CONST_2_PITCH__SHIFT) & A4XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A4XX_TEX_CONST_2_SWAP__MASK                             0xc0000000
+#define A4XX_TEX_CONST_2_SWAP__SHIFT                            30
+static inline uint32_t A4XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
+{
+        return ((val) << A4XX_TEX_CONST_2_SWAP__SHIFT) & A4XX_TEX_CONST_2_SWAP__MASK;
+}
+#define REG_A4XX_TEX_CONST_3                                    0x00000003
+#define A4XX_TEX_CONST_3_LAYERSZ__MASK                          0x00003fff
+#define A4XX_TEX_CONST_3_LAYERSZ__SHIFT                         0
+static inline uint32_t A4XX_TEX_CONST_3_LAYERSZ(uint32_t val)
+{
+        return ((val >> 12) << A4XX_TEX_CONST_3_LAYERSZ__SHIFT) & A4XX_TEX_CONST_3_LAYERSZ__MASK;
+}
+#define A4XX_TEX_CONST_3_DEPTH__MASK                            0x7ffc0000
+#define A4XX_TEX_CONST_3_DEPTH__SHIFT                           18
+static inline uint32_t A4XX_TEX_CONST_3_DEPTH(uint32_t val)
+{
+        return ((val) << A4XX_TEX_CONST_3_DEPTH__SHIFT) & A4XX_TEX_CONST_3_DEPTH__MASK;
+}
+#define REG_A4XX_TEX_CONST_4                                    0x00000004
+#define A4XX_TEX_CONST_4_LAYERSZ__MASK                          0x0000000f
+#define A4XX_TEX_CONST_4_LAYERSZ__SHIFT                         0
+static inline uint32_t A4XX_TEX_CONST_4_LAYERSZ(uint32_t val)
+{
+        return ((val >> 12) << A4XX_TEX_CONST_4_LAYERSZ__SHIFT) & A4XX_TEX_CONST_4_LAYERSZ__MASK;
+}
+#define A4XX_TEX_CONST_4_BASE__MASK                             0xffffffe0
+#define A4XX_TEX_CONST_4_BASE__SHIFT                            5
+static inline uint32_t A4XX_TEX_CONST_4_BASE(uint32_t val)
+{
+        return ((val >> 5) << A4XX_TEX_CONST_4_BASE__SHIFT) & A4XX_TEX_CONST_4_BASE__MASK;
+}
+#define REG_A4XX_TEX_CONST_5                                    0x00000005
+#define REG_A4XX_TEX_CONST_6                                    0x00000006
+#define REG_A4XX_TEX_CONST_7                                    0x00000007
+#endif /* A4XX_XML */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
 ,0 → 1,129
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd4_blend.h"
+#include "fd4_context.h"
+#include "fd4_format.h"
+static enum a4xx_rb_blend_opcode
+blend_func(unsigned func)
+{
+        switch (func) {
+        case PIPE_BLEND_ADD:
+                return BLEND_DST_PLUS_SRC;
+        case PIPE_BLEND_MIN:
+                return BLEND_MIN_DST_SRC;
+        case PIPE_BLEND_MAX:
+                return BLEND_MAX_DST_SRC;
+        case PIPE_BLEND_SUBTRACT:
+                return BLEND_SRC_MINUS_DST;
+        case PIPE_BLEND_REVERSE_SUBTRACT:
+                return BLEND_DST_MINUS_SRC;
+        default:
+                DBG("invalid blend func: %x", func);
+                return 0;
+        }
+}
+void *
+fd4_blend_state_create(struct pipe_context *pctx,
+                const struct pipe_blend_state *cso)
+{
+        struct fd4_blend_stateobj *so;
+//      enum a3xx_rop_code rop = ROP_COPY;
+        bool reads_dest = false;
+        int i;
+        if (cso->logicop_enable) {
+//              rop = cso->logicop_func;  /* maps 1:1 */
+                switch (cso->logicop_func) {
+                case PIPE_LOGICOP_NOR:
+                case PIPE_LOGICOP_AND_INVERTED:
+                case PIPE_LOGICOP_AND_REVERSE:
+                case PIPE_LOGICOP_INVERT:
+                case PIPE_LOGICOP_XOR:
+                case PIPE_LOGICOP_NAND:
+                case PIPE_LOGICOP_AND:
+                case PIPE_LOGICOP_EQUIV:
+                case PIPE_LOGICOP_NOOP:
+                case PIPE_LOGICOP_OR_INVERTED:
+                case PIPE_LOGICOP_OR_REVERSE:
+                case PIPE_LOGICOP_OR:
+                        reads_dest = true;
+                        break;
+                }
+        }
+        if (cso->independent_blend_enable) {
+                DBG("Unsupported! independent blend state");
+                return NULL;
+        }
+        so = CALLOC_STRUCT(fd4_blend_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+                const struct pipe_rt_blend_state *rt = &cso->rt[i];
+                so->rb_mrt[i].blend_control =
+                                A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
+                                A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+                                A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+                so->rb_mrt[i].control =
+xc00 | /* XXX ROP_CODE ?? */
+                                A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+                if (rt->blend_enable) {
+                        so->rb_mrt[i].control |=
+                                        A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+                                        A4XX_RB_MRT_CONTROL_BLEND |
+                                        A4XX_RB_MRT_CONTROL_BLEND2;
+                        so->rb_fs_output |= A4XX_RB_FS_OUTPUT_ENABLE_BLEND(1);
+                }
+                if (reads_dest)
+                        so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+                if (cso->dither)
+                        so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
+        }
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
 ,0 → 1,54
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_BLEND_H_
+#define FD4_BLEND_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+struct fd4_blend_stateobj {
+        struct pipe_blend_state base;
+        struct {
+                uint32_t control;
+                uint32_t buf_info;
+                uint32_t blend_control;
+        } rb_mrt[8];
+        uint32_t rb_fs_output;
+};
+static INLINE struct fd4_blend_stateobj *
+fd4_blend_stateobj(struct pipe_blend_state *blend)
+{
+        return (struct fd4_blend_stateobj *)blend;
+}
+void * fd4_blend_state_create(struct pipe_context *pctx,
+                const struct pipe_blend_state *cso);
+#endif /* FD4_BLEND_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_context.c
 ,0 → 1,172
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "fd4_context.h"
+#include "fd4_blend.h"
+#include "fd4_draw.h"
+#include "fd4_emit.h"
+#include "fd4_gmem.h"
+#include "fd4_program.h"
+#include "fd4_query.h"
+#include "fd4_rasterizer.h"
+#include "fd4_texture.h"
+#include "fd4_zsa.h"
+static void
+fd4_context_destroy(struct pipe_context *pctx)
+{
+        struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
+        util_dynarray_fini(&fd4_ctx->rbrc_patches);
+        fd_bo_del(fd4_ctx->vs_pvt_mem);
+        fd_bo_del(fd4_ctx->fs_pvt_mem);
+        fd_bo_del(fd4_ctx->vsc_size_mem);
+        pctx->delete_vertex_elements_state(pctx, fd4_ctx->solid_vbuf_state.vtx);
+        pctx->delete_vertex_elements_state(pctx, fd4_ctx->blit_vbuf_state.vtx);
+        pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
+        pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);
+        fd_context_destroy(pctx);
+}
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+        static const float init_shader_const[] = {
+                        -1.000000, +1.000000, +1.000000,
+                        +1.000000, -1.000000, +1.000000,
+        };
+        struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                        PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+        pipe_buffer_write(pctx, prsc, 0,
+                        sizeof(init_shader_const), init_shader_const);
+        return prsc;
+}
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+        struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                        PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+        return prsc;
+}
+static const uint8_t primtypes[PIPE_PRIM_MAX] = {
+                [PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A3XX,
+                [PIPE_PRIM_LINES]          = DI_PT_LINELIST,
+                [PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
+                [PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
+                [PIPE_PRIM_TRIANGLES]      = DI_PT_TRILIST,
+                [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+                [PIPE_PRIM_TRIANGLE_FAN]   = DI_PT_TRIFAN,
+};
+struct pipe_context *
+fd4_context_create(struct pipe_screen *pscreen, void *priv)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
+        struct pipe_context *pctx;
+        if (!fd4_ctx)
+                return NULL;
+        pctx = &fd4_ctx->base.base;
+        fd4_ctx->base.dev = fd_device_ref(screen->dev);
+        fd4_ctx->base.screen = fd_screen(pscreen);
+        pctx->destroy = fd4_context_destroy;
+        pctx->create_blend_state = fd4_blend_state_create;
+        pctx->create_rasterizer_state = fd4_rasterizer_state_create;
+        pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
+        fd4_draw_init(pctx);
+        fd4_gmem_init(pctx);
+        fd4_texture_init(pctx);
+        fd4_prog_init(pctx);
+        pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
+        if (!pctx)
+                return NULL;
+        util_dynarray_init(&fd4_ctx->rbrc_patches);
+        fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+        fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+        fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
+                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+        fd4_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+        fd4_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+        /* setup solid_vbuf_state: */
+        fd4_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                        pctx, 1, (struct pipe_vertex_element[]){{
+                                .vertex_buffer_index = 0,
+                                .src_offset = 0,
+                                .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                        }});
+        fd4_ctx->solid_vbuf_state.vertexbuf.count = 1;
+        fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+        fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->solid_vbuf;
+        /* setup blit_vbuf_state: */
+        fd4_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                        pctx, 2, (struct pipe_vertex_element[]){{
+                                .vertex_buffer_index = 0,
+                                .src_offset = 0,
+                                .src_format = PIPE_FORMAT_R32G32_FLOAT,
+                        }, {
+                                .vertex_buffer_index = 1,
+                                .src_offset = 0,
+                                .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                        }});
+        fd4_ctx->blit_vbuf_state.vertexbuf.count = 2;
+        fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+        fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->blit_texcoord_vbuf;
+        fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+        fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd4_ctx->solid_vbuf;
+        fd4_query_context_init(pctx);
+        return pctx;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_context.h
 ,0 → 1,105
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_CONTEXT_H_
+#define FD4_CONTEXT_H_
+#include "freedreno_drmif.h"
+#include "freedreno_context.h"
+#include "ir3_shader.h"
+struct fd4_context {
+        struct fd_context base;
+        /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
+         * once we know whether or not to use GMEM, and GMEM tile pitch.
+         */
+        struct util_dynarray rbrc_patches;
+        struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+        /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes).  We
+         * could combine it with another allocation.
+         */
+        struct fd_bo *vsc_size_mem;
+        /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+         * vertices:
+         */
+        struct pipe_resource *solid_vbuf;
+        /* vertex buf used for mem->gmem tex coords:
+         */
+        struct pipe_resource *blit_texcoord_vbuf;
+        /* vertex state for solid_vbuf:
+         *    - solid_vbuf / 12 / R32G32B32_FLOAT
+         */
+        struct fd_vertex_state solid_vbuf_state;
+        /* vertex state for blit_prog:
+         *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
+         *    - solid_vbuf / 12 / R32G32B32_FLOAT
+         */
+        struct fd_vertex_state blit_vbuf_state;
+        /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+        bool vsaturate, fsaturate;
+        /* bitmask of sampler which needs coords clamped for vertex
+         * shader:
+         */
+        uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
+        /* bitmask of sampler which needs coords clamped for frag
+         * shader:
+         */
+        uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
+        /* bitmask of integer texture samplers */
+        uint16_t vinteger_s, finteger_s;
+        /* some state changes require a different shader variant.  Keep
+         * track of this so we know when we need to re-emit shader state
+         * due to variant change.  See fixup_shader_state()
+         */
+        struct ir3_shader_key last_key;
+};
+static INLINE struct fd4_context *
+fd4_context(struct fd_context *ctx)
+{
+        return (struct fd4_context *)ctx;
+}
+struct pipe_context *
+fd4_context_create(struct pipe_screen *pscreen, void *priv);
+#endif /* FD4_CONTEXT_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
 ,0 → 1,333
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "fd4_draw.h"
+#include "fd4_context.h"
+#include "fd4_emit.h"
+#include "fd4_program.h"
+#include "fd4_format.h"
+#include "fd4_zsa.h"
+static void
+draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                struct fd4_emit *emit)
+{
+        const struct pipe_draw_info *info = emit->info;
+        fd4_emit_state(ctx, ring, emit);
+        if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+                fd4_emit_vertex_bufs(ring, emit);
+        OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+        OUT_RING(ring, info->indexed ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */
+        OUT_RING(ring, info->start_instance);   /* ??? UNKNOWN_2209 */
+        OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
+        OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+                        info->restart_index : 0xffffffff);
+        fd4_draw_emit(ctx, ring,
+                        emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+                        info);
+}
+/* fixup dirty shader state in case some "unrelated" (from the state-
+ * tracker's perspective) state change causes us to switch to a
+ * different variant.
+ */
+static void
+fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct ir3_shader_key *last_key = &fd4_ctx->last_key;
+        if (!ir3_shader_key_equal(last_key, key)) {
+                ctx->dirty |= FD_DIRTY_PROG;
+                if (last_key->has_per_samp || key->has_per_samp) {
+                        if ((last_key->vsaturate_s != key->vsaturate_s) ||
+                                        (last_key->vsaturate_t != key->vsaturate_t) ||
+                                        (last_key->vsaturate_r != key->vsaturate_r) ||
+                                        (last_key->vinteger_s != key->vinteger_s))
+                                ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+                        if ((last_key->fsaturate_s != key->fsaturate_s) ||
+                                        (last_key->fsaturate_t != key->fsaturate_t) ||
+                                        (last_key->fsaturate_r != key->fsaturate_r))
+                                ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                }
+                if (last_key->color_two_side != key->color_two_side)
+                        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                if (last_key->half_precision != key->half_precision)
+                        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                if (last_key->rasterflat != key->rasterflat)
+                        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+                fd4_ctx->last_key = *key;
+        }
+}
+static void
+fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd4_emit emit = {
+                .vtx  = &ctx->vtx,
+                .prog = &ctx->prog,
+                .info = info,
+                .key = {
+                        /* do binning pass first: */
+                        .binning_pass = true,
+                        .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+                        .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+                        // TODO set .half_precision based on render target format,
+                        // ie. float16 and smaller use half, float32 use full..
+                        .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+                        .has_per_samp = (fd4_ctx->fsaturate || fd4_ctx->vsaturate ||
+                                        fd4_ctx->vinteger_s || fd4_ctx->finteger_s),
+                        .vsaturate_s = fd4_ctx->vsaturate_s,
+                        .vsaturate_t = fd4_ctx->vsaturate_t,
+                        .vsaturate_r = fd4_ctx->vsaturate_r,
+                        .fsaturate_s = fd4_ctx->fsaturate_s,
+                        .fsaturate_t = fd4_ctx->fsaturate_t,
+                        .fsaturate_r = fd4_ctx->fsaturate_r,
+                        .vinteger_s = fd4_ctx->vinteger_s,
+                        .finteger_s = fd4_ctx->finteger_s,
+                },
+                .format = fd4_emit_format(pfb->cbufs[0]),
+                .pformat = pipe_surface_format(pfb->cbufs[0]),
+        };
+        unsigned dirty;
+        fixup_shader_state(ctx, &emit.key);
+        dirty = ctx->dirty;
+        emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+        draw_impl(ctx, ctx->binning_ring, &emit);
+        /* and now regular (non-binning) pass: */
+        emit.key.binning_pass = false;
+        emit.dirty = dirty;
+        emit.vp = NULL;   /* we changed key so need to refetch vp */
+        draw_impl(ctx, ctx->ring, &emit);
+}
+/* clear operations ignore viewport state, so we need to reset it
+ * based on framebuffer state:
+ */
+static void
+reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb)
+{
+        float half_width = pfb->width * 0.5f;
+        float half_height = pfb->height * 0.5f;
+        OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 4);
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(half_width));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(half_width));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(half_height));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-half_height));
+}
+static void
+fd4_clear(struct fd_context *ctx, unsigned buffers,
+                const union pipe_color_union *color, double depth, unsigned stencil)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        unsigned dirty = ctx->dirty;
+        unsigned ce, i;
+        struct fd4_emit emit = {
+                .vtx  = &fd4_ctx->solid_vbuf_state,
+                .prog = &ctx->solid_prog,
+                .key = {
+                        .half_precision = true,
+                },
+                .format = fd4_emit_format(pfb->cbufs[0]),
+        };
+        uint32_t colr = 0;
+        if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
+                colr  = pack_rgba(pfb->cbufs[0]->format, color->f);
+        dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
+        dirty |= FD_DIRTY_PROG;
+        emit.dirty = dirty;
+        OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+        /* emit generic state now: */
+        fd4_emit_state(ctx, ring, &emit);
+        reset_viewport(ring, pfb);
+        if (buffers & PIPE_CLEAR_DEPTH) {
+                OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+                OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+                                A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                                A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
+                fd_wfi(ctx, ring);
+                OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2);
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth));
+                ctx->dirty |= FD_DIRTY_VIEWPORT;
+        } else {
+                OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+                OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+        }
+        if (buffers & PIPE_CLEAR_STENCIL) {
+                OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+                OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+                                A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
+                                A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+                OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+                                A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+xff000000 | // XXX ???
+                                A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+                OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+                OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                                A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+                                A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
+                                A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                                A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+                OUT_RING(ring, A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER);
+        } else {
+                OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+                OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+                                A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                                A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
+                OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+                                A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+                                A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));
+                OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+                OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                                A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                                A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                                A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+                OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+        }
+        if (buffers & PIPE_CLEAR_COLOR) {
+                OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+                OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+                ce = 0xf;
+        } else {
+                ce = 0x0;
+        }
+        for (i = 0; i < 8; i++) {
+                OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+                OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
+                                A4XX_RB_MRT_CONTROL_B11 |
+                                A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+                OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+                OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+                                A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+        }
+        fd4_emit_vertex_bufs(ring, &emit);
+        OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+        OUT_RING(ring, 0x0);          /* XXX GRAS_ALPHA_CONTROL */
+        OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
+        OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW0 */
+        OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW1 */
+        OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW2 */
+        OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */
+        /* until fastclear works: */
+        fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+        OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
+        OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
+        OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */
+        OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
+        OUT_RING(ring, 0x00000001);
+        fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
+        OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+        OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                        A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+void
+fd4_draw_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->draw_vbo = fd4_draw_vbo;
+        ctx->clear = fd4_clear;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
 ,0 → 1,134
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_DRAW_H_
+#define FD4_DRAW_H_
+#include "pipe/p_context.h"
+#include "freedreno_draw.h"
+void fd4_draw_init(struct pipe_context *pctx);
+/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */
+static inline uint32_t DRAW4(enum pc_di_primtype prim_type,
+                enum pc_di_src_sel source_select, enum a4xx_index_size index_size,
+                enum pc_di_vis_cull_mode vis_cull_mode)
+{
+        return (prim_type         << 0) |
+                        (source_select     << 6) |
+                        (index_size        << 10);
+}
+static inline void
+fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum pc_di_primtype primtype,
+                enum pc_di_vis_cull_mode vismode,
+                enum pc_di_src_sel src_sel, uint32_t count,
+                uint32_t instances, enum a4xx_index_size idx_type,
+                uint32_t idx_size, uint32_t idx_offset,
+                struct fd_bo *idx_bo)
+{
+        /* for debug after a lock up, write a unique counter value
+         * to scratch7 for each draw, to make it easier to match up
+         * register dumps to cmdstream.  The combination of IB
+         * (scratch6) and DRAW is enough to "triangulate" the
+         * particular draw that caused lockup.
+         */
+        emit_marker(ring, 7);
+        OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_bo ? 6 : 3);
+        if (vismode == USE_VISIBILITY) {
+                /* leave vis mode blank for now, it will be patched up when
+                 * we know if we are binning or not
+                 */
+                OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
+                                &ctx->draw_patches);
+        } else {
+                OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
+        }
+        OUT_RING(ring, instances);         /* NumInstances */
+        OUT_RING(ring, count);             /* NumIndices */
+        if (idx_bo) {
+                OUT_RING(ring, 0x0);           /* XXX */
+                OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
+                OUT_RING (ring, idx_size);
+        }
+        emit_marker(ring, 7);
+        fd_reset_wfi(ctx);
+}
+static inline enum pc_di_index_size
+fd4_size2indextype(unsigned index_size)
+{
+        switch (index_size) {
+        case 1: return INDEX4_SIZE_8_BIT;
+        case 2: return INDEX4_SIZE_16_BIT;
+        case 4: return INDEX4_SIZE_32_BIT;
+        }
+        DBG("unsupported index size: %d", index_size);
+        assert(0);
+        return INDEX4_SIZE_32_BIT;
+}
+static inline void
+fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum pc_di_vis_cull_mode vismode,
+                const struct pipe_draw_info *info)
+{
+        struct pipe_index_buffer *idx = &ctx->indexbuf;
+        struct fd_bo *idx_bo = NULL;
+        enum a4xx_index_size idx_type;
+        enum pc_di_src_sel src_sel;
+        uint32_t idx_size, idx_offset;
+        if (info->indexed) {
+                assert(!idx->user_buffer);
+                idx_bo = fd_resource(idx->buffer)->bo;
+                idx_type = fd4_size2indextype(idx->index_size);
+                idx_size = idx->index_size * info->count;
+                idx_offset = idx->offset + (info->start * idx->index_size);
+                src_sel = DI_SRC_SEL_DMA;
+        } else {
+                idx_bo = NULL;
+                idx_type = INDEX4_SIZE_32_BIT;
+                idx_size = 0;
+                idx_offset = 0;
+                src_sel = DI_SRC_SEL_AUTO_INDEX;
+        }
+        fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+                        info->count, info->instance_count,
+                        idx_type, idx_size, idx_offset, idx_bo);
+}
+#endif /* FD4_DRAW_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
 ,0 → 1,765
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
+#include "freedreno_resource.h"
+#include "fd4_emit.h"
+#include "fd4_blend.h"
+#include "fd4_context.h"
+#include "fd4_program.h"
+#include "fd4_rasterizer.h"
+#include "fd4_texture.h"
+#include "fd4_format.h"
+#include "fd4_zsa.h"
+/* regid:          base const register
+ * prsc or dwords: buffer containing constant values
+ * sizedwords:     size of const value buffer
+ */
+void
+fd4_emit_constant(struct fd_ringbuffer *ring,
+                enum adreno_state_block sb,
+                uint32_t regid, uint32_t offset, uint32_t sizedwords,
+                const uint32_t *dwords, struct pipe_resource *prsc)
+{
+        uint32_t i, sz;
+        enum adreno_state_src src;
+        if (prsc) {
+                sz = 0;
+                src = 0x2;  // TODO ??
+        } else {
+                sz = sizedwords;
+                src = SS_DIRECT;
+        }
+        OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+                        CP_LOAD_STATE_0_STATE_SRC(src) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                        CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
+        if (prsc) {
+                struct fd_bo *bo = fd_resource(prsc)->bo;
+                OUT_RELOC(ring, bo, offset,
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
+        } else {
+                OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+                dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
+        }
+        for (i = 0; i < sz; i++) {
+                OUT_RING(ring, dwords[i]);
+        }
+}
+static void
+emit_constants(struct fd_ringbuffer *ring,
+                enum adreno_state_block sb,
+                struct fd_constbuf_stateobj *constbuf,
+                struct ir3_shader_variant *shader,
+                bool emit_immediates)
+{
+        uint32_t enabled_mask = constbuf->enabled_mask;
+        uint32_t max_const;
+        int i;
+        // XXX TODO only emit dirty consts.. but we need to keep track if
+        // they are clobbered by a clear, gmem2mem, or mem2gmem..
+        constbuf->dirty_mask = enabled_mask;
+        /* in particular, with binning shader we may end up with unused
+         * consts, ie. we could end up w/ constlen that is smaller
+         * than first_immediate.  In that case truncate the user consts
+         * early to avoid HLSQ lockup caused by writing too many consts
+         */
+        max_const = MIN2(shader->first_driver_param, shader->constlen);
+        /* emit user constants: */
+        if (enabled_mask & 1) {
+                const unsigned index = 0;
+                struct pipe_constant_buffer *cb = &constbuf->cb[index];
+                unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+                // I expect that size should be a multiple of vec4's:
+                assert(size == align(size, 4));
+                /* and even if the start of the const buffer is before
+                 * first_immediate, the end may not be:
+                 */
+                size = MIN2(size, 4 * max_const);
+                if (size && (constbuf->dirty_mask & (1 << index))) {
+                        fd4_emit_constant(ring, sb, 0,
+                                        cb->buffer_offset, size,
+                                        cb->user_buffer, cb->buffer);
+                        constbuf->dirty_mask &= ~(1 << index);
+                }
+                enabled_mask &= ~(1 << index);
+        }
+        /* emit ubos: */
+        if (shader->constlen > shader->first_driver_param) {
+                uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
+                OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
+                OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
+                                CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                                CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                                CP_LOAD_STATE_0_NUM_UNIT(params));
+                OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+                for (i = 1; i <= params * 4; i++) {
+                        struct pipe_constant_buffer *cb = &constbuf->cb[i];
+                        assert(!cb->user_buffer);
+                        if ((enabled_mask & (1 << i)) && cb->buffer)
+                                OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
+                        else
+                                OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
+                }
+        }
+        /* emit shader immediates: */
+        if (shader && emit_immediates) {
+                int size = shader->immediates_count;
+                uint32_t base = shader->first_immediate;
+                /* truncate size to avoid writing constants that shader
+                 * does not use:
+                 */
+                size = MIN2(size + base, shader->constlen) - base;
+                /* convert out of vec4: */
+                base *= 4;
+                size *= 4;
+                if (size > 0) {
+                        fd4_emit_constant(ring, sb, base,
+, size, shader->immediates[0].val, NULL);
+                }
+        }
+}
+static void
+emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum adreno_state_block sb, struct fd_texture_stateobj *tex)
+{
+        unsigned i;
+        if (tex->num_samplers > 0) {
+                int num_samplers;
+                /* not sure if this is an a420.0 workaround, but we seem
+                 * to need to emit these in pairs.. emit a final dummy
+                 * entry if odd # of samplers:
+                 */
+                num_samplers = align(tex->num_samplers, 2);
+                /* output sampler state: */
+                OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * num_samplers));
+                OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                                CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                                CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                                CP_LOAD_STATE_0_NUM_UNIT(num_samplers));
+                OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                                CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+                for (i = 0; i < tex->num_samplers; i++) {
+                        static const struct fd4_sampler_stateobj dummy_sampler = {};
+                        const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
+                                        fd4_sampler_stateobj(tex->samplers[i]) :
+                                        &dummy_sampler;
+                        OUT_RING(ring, sampler->texsamp0);
+                        OUT_RING(ring, sampler->texsamp1);
+                }
+                for (; i < num_samplers; i++) {
+                        OUT_RING(ring, 0x00000000);
+                        OUT_RING(ring, 0x00000000);
+                }
+        }
+        if (tex->num_textures > 0) {
+                /* emit texture state: */
+                OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures));
+                OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                                CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                                CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                                CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
+                OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                                CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+                for (i = 0; i < tex->num_textures; i++) {
+                        static const struct fd4_pipe_sampler_view dummy_view = {};
+                        const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
+                                        fd4_pipe_sampler_view(tex->textures[i]) :
+                                        &dummy_view;
+                        struct fd_resource *rsc = fd_resource(view->base.texture);
+                        unsigned start = view->base.u.tex.first_level;
+                        uint32_t offset = fd_resource_offset(rsc, start, 0);
+                        OUT_RING(ring, view->texconst0);
+                        OUT_RING(ring, view->texconst1);
+                        OUT_RING(ring, view->texconst2);
+                        OUT_RING(ring, view->texconst3);
+                        OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+                        OUT_RING(ring, 0x00000000);
+                        OUT_RING(ring, 0x00000000);
+                        OUT_RING(ring, 0x00000000);
+                }
+        }
+}
+/* emit texture state for mem->gmem restore operation.. eventually it would
+ * be good to get rid of this and use normal CSO/etc state for more of these
+ * special cases..
+ */
+void
+fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+{
+        struct fd_resource *rsc = fd_resource(psurf->texture);
+        unsigned lvl = psurf->u.tex.level;
+        struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
+        uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
+        enum pipe_format format = fd4_gmem_restore_format(psurf->format);
+        debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+        /* output sampler state: */
+        OUT_PKT3(ring, CP_LOAD_STATE, 4);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+                        CP_LOAD_STATE_0_NUM_UNIT(1));
+        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+        OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
+                        A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
+                        A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
+                        A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
+                        A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
+        OUT_RING(ring, 0x00000000);
+        /* emit texture state: */
+        OUT_PKT3(ring, CP_LOAD_STATE, 10);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                        CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+                        CP_LOAD_STATE_0_NUM_UNIT(1));
+        OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                        CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+        OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
+                        A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+                        fd4_tex_swiz(format,  PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+                                        PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+        OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) |
+                        A4XX_TEX_CONST_1_HEIGHT(psurf->height));
+        OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+        OUT_RING(ring, 0x00000000);
+        OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000000);
+}
+void
+fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+{
+        int32_t i, j, last = -1;
+        uint32_t total_in = 0;
+        const struct fd_vertex_state *vtx = emit->vtx;
+        struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+        unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+        for (i = 0; i < vp->inputs_count; i++) {
+                uint8_t semantic = sem2name(vp->inputs[i].semantic);
+                if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
+                        vertex_regid = vp->inputs[i].regid;
+                else if (semantic == TGSI_SEMANTIC_INSTANCEID)
+                        instance_regid = vp->inputs[i].regid;
+                else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
+                        last = i;
+        }
+        /* hw doesn't like to be configured for zero vbo's, it seems: */
+        if ((vtx->vtx->num_elements == 0) &&
+                        (vertex_regid == regid(63, 0)) &&
+                        (instance_regid == regid(63, 0)))
+                return;
+        for (i = 0, j = 0; i <= last; i++) {
+                assert(sem2name(vp->inputs[i].semantic) == 0);
+                if (vp->inputs[i].compmask) {
+                        struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+                        const struct pipe_vertex_buffer *vb =
+                                        &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+                        struct fd_resource *rsc = fd_resource(vb->buffer);
+                        enum pipe_format pfmt = elem->src_format;
+                        enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
+                        bool switchnext = (i != last) ||
+                                        (vertex_regid != regid(63, 0)) ||
+                                        (instance_regid != regid(63, 0));
+                        bool isint = util_format_is_pure_integer(pfmt);
+                        uint32_t fs = util_format_get_blocksize(pfmt);
+                        uint32_t off = vb->buffer_offset + elem->src_offset;
+                        uint32_t size = fd_bo_size(rsc->bo) - off;
+                        debug_assert(fmt != ~0);
+                        OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
+                        OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+                                        A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+                                        COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) |
+                                        COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+                        OUT_RELOC(ring, rsc->bo, off, 0, 0);
+                        OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
+                        OUT_RING(ring, A4XX_VFD_FETCH_INSTR_3_STEPRATE(MAX2(1, elem->instance_divisor)));
+                        OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
+                        OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+                                        A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+                                        A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+                                        A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
+                                        A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+                                        A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+                                        A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+                                        COND(isint, A4XX_VFD_DECODE_INSTR_INT) |
+                                        COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+                        total_in += vp->inputs[i].ncomp;
+                        j++;
+                }
+        }
+        OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
+        OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+xa0000 | /* XXX */
+                        A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+                        A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+        OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
+                        A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+                        A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
+        OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_2 */
+        OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(regid(63, 0)));
+        OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_4 */
+        /* cache invalidate, otherwise vertex fetch could see
+         * stale vbo contents:
+         */
+        OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000012);
+}
+void
+fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                struct fd4_emit *emit)
+{
+        struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+        struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
+        uint32_t dirty = emit->dirty;
+        emit_marker(ring, 5);
+        if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
+                uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;
+                /* I suppose if we needed to (which I don't *think* we need
+                 * to), we could emit this for binning pass too.  But we
+                 * would need to keep a different patch-list for binning
+                 * vs render pass.
+                 */
+                OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+                OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
+        }
+        if (dirty & FD_DIRTY_ZSA) {
+                struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+                OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+                OUT_RING(ring, zsa->rb_alpha_control);
+                OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+                OUT_RING(ring, zsa->rb_stencil_control);
+                OUT_RING(ring, zsa->rb_stencil_control2);
+        }
+        if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+                struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+                struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+                OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+                OUT_RING(ring, zsa->rb_stencilrefmask |
+                                A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+                OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+                                A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+        }
+        if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+                struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+                bool fragz = fp->has_kill | fp->writes_pos;
+                OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+                OUT_RING(ring, zsa->rb_depth_control |
+                                COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE));
+                /* maybe this register/bitfield needs a better name.. this
+                 * appears to be just disabling early-z
+                 */
+                OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+                OUT_RING(ring, zsa->gras_alpha_control |
+                                COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE));
+        }
+        if (dirty & FD_DIRTY_RASTERIZER) {
+                struct fd4_rasterizer_stateobj *rasterizer =
+                                fd4_rasterizer_stateobj(ctx->rasterizer);
+                OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+                OUT_RING(ring, rasterizer->gras_su_mode_control |
+                                A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
+                OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
+                OUT_RING(ring, rasterizer->gras_su_point_minmax);
+                OUT_RING(ring, rasterizer->gras_su_point_size);
+                OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
+                OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+                OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+                OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+                OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
+        }
+        /* NOTE: since primitive_restart is not actually part of any
+         * state object, we need to make sure that we always emit
+         * PRIM_VTX_CNTL.. either that or be more clever and detect
+         * when it changes.
+         */
+        if (emit->info) {
+                const struct pipe_draw_info *info = emit->info;
+                uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
+                                ->pc_prim_vtx_cntl;
+                if (info->indexed && info->primitive_restart)
+                        val |= A4XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
+                val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
+                if (fp->total_in > 0) {
+                        uint32_t varout = align(fp->total_in, 16) / 16;
+                        if (varout > 1)
+                                varout = align(varout, 2);
+                        val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
+                }
+                OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
+                OUT_RING(ring, val);
+                OUT_RING(ring, 0x12);     /* XXX UNKNOWN_21C5 */
+        }
+        if (dirty & FD_DIRTY_SCISSOR) {
+                struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+                OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+                OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
+                                A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
+                OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
+                                A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
+                ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
+                ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
+                ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
+                ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
+        }
+        if (dirty & FD_DIRTY_VIEWPORT) {
+                fd_wfi(ctx, ring);
+                OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
+                OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
+        }
+        if (dirty & FD_DIRTY_PROG)
+                fd4_program_emit(ring, emit);
+        if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
+                        /* evil hack to deal sanely with clear path: */
+                        (emit->prog == &ctx->prog)) {
+                fd_wfi(ctx, ring);
+                emit_constants(ring,  SB_VERT_SHADER,
+                                &ctx->constbuf[PIPE_SHADER_VERTEX],
+                                vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
+                if (!emit->key.binning_pass) {
+                        emit_constants(ring, SB_FRAG_SHADER,
+                                        &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+                                        fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
+                }
+        }
+        /* emit driver params every time */
+        if (emit->info && emit->prog == &ctx->prog) {
+                uint32_t vertex_params[4] = {
+                        emit->info->indexed ? emit->info->index_bias : emit->info->start,
+,
+,
+                };
+                if (vp->constlen >= vp->first_driver_param + 4) {
+                        fd4_emit_constant(ring, SB_VERT_SHADER,
+                                                          (vp->first_driver_param + 4) * 4,
+, 4, vertex_params, NULL);
+                }
+        }
+        if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
+                struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
+                uint32_t i;
+                for (i = 0; i < 8; i++) {
+                        OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+                        OUT_RING(ring, blend->rb_mrt[i].control);
+                        OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+                        OUT_RING(ring, blend->rb_mrt[i].blend_control);
+                }
+                OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
+                OUT_RING(ring, blend->rb_fs_output |
+                                A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
+        }
+        if (dirty & FD_DIRTY_BLEND_COLOR) {
+                struct pipe_blend_color *bcolor = &ctx->blend_color;
+                OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
+                OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
+                                A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
+                OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
+                                A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
+                OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
+                                A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
+                OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
+                                A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
+        }
+        if (dirty & FD_DIRTY_VERTTEX) {
+                if (vp->has_samp)
+                        emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
+                else
+                        dirty &= ~FD_DIRTY_VERTTEX;
+        }
+        if (dirty & FD_DIRTY_FRAGTEX) {
+                if (fp->has_samp)
+                        emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
+                else
+                        dirty &= ~FD_DIRTY_FRAGTEX;
+        }
+        ctx->dirty &= ~dirty;
+}
+/* emit setup at begin of new cmdstream buffer (don't rely on previous
+ * state, there could have been a context switch between ioctls):
+ */
+void
+fd4_emit_restore(struct fd_context *ctx)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
+        OUT_RING(ring, 0x00000001);
+        OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
+        OUT_RING(ring, 0x00000006);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
+        OUT_RING(ring, 0x0000003a);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
+        OUT_RING(ring, 0x00000001);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
+        OUT_RING(ring, 0x00000007);
+        OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, 0x00000012);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
+        OUT_RING(ring, 0x00000006);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
+        OUT_RING(ring, 0x00040000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+        OUT_RING(ring, 0x00001000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
+        OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
+                        A4XX_RB_BLEND_RED_FLOAT(0.0));
+        OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(0) |
+                        A4XX_RB_BLEND_GREEN_FLOAT(0.0));
+        OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(0) |
+                        A4XX_RB_BLEND_BLUE_FLOAT(0.0));
+        OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
+                        A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
+        OUT_RING(ring, 0x3f800000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
+        OUT_RING(ring, 0x0000001d);
+        OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
+        OUT_RING(ring, 0x00000001);
+        OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
+        OUT_RING(ring, 0x00000000);
+        OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_COUNT, 1);
+        OUT_RING(ring, A4XX_TPL1_TP_TEX_COUNT_VS(16) |
+                        A4XX_TPL1_TP_TEX_COUNT_HS(0) |
+                        A4XX_TPL1_TP_TEX_COUNT_DS(0) |
+                        A4XX_TPL1_TP_TEX_COUNT_GS(0));
+        OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1);
+        OUT_RING(ring, 16);
+        /* we don't use this yet.. probably best to disable.. */
+        OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
+        OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) |
+                        CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS |
+                        CP_SET_DRAW_STATE_0_GROUP_ID(0));
+        OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0));
+        OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
+        OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_PARAM */
+        OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */
+        OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
+        OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_PARAM */
+        OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                        A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+        OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
+                        A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
+        OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
+        OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+                        A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+        OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
+        OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
+        OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
+        OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+        OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf));
+        OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+        OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
+        OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+        OUT_RING(ring, 0x0);
+        ctx->needs_rb_fbd = true;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
 ,0 → 1,99
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_EMIT_H
+#define FD4_EMIT_H
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "fd4_format.h"
+#include "fd4_program.h"
+#include "ir3_shader.h"
+struct fd_ringbuffer;
+enum adreno_state_block;
+void fd4_emit_constant(struct fd_ringbuffer *ring,
+                enum adreno_state_block sb,
+                uint32_t regid, uint32_t offset, uint32_t sizedwords,
+                const uint32_t *dwords, struct pipe_resource *prsc);
+void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+                struct pipe_surface *psurf);
+/* grouped together emit-state for prog/vertex/state emit: */
+struct fd4_emit {
+        const struct fd_vertex_state *vtx;
+        const struct fd_program_stateobj *prog;
+        const struct pipe_draw_info *info;
+        struct ir3_shader_key key;
+        enum a4xx_color_fmt format;
+        enum pipe_format pformat;
+        uint32_t dirty;
+        /* cached to avoid repeated lookups of same variants: */
+        struct ir3_shader_variant *vp, *fp;
+        /* TODO: other shader stages.. */
+};
+static inline enum a4xx_color_fmt fd4_emit_format(struct pipe_surface *surf)
+{
+        if (!surf)
+                return 0;
+        return fd4_pipe2color(surf->format);
+}
+static inline struct ir3_shader_variant *
+fd4_emit_get_vp(struct fd4_emit *emit)
+{
+        if (!emit->vp) {
+                struct fd4_shader_stateobj *so = emit->prog->vp;
+                emit->vp = ir3_shader_variant(so->shader, emit->key);
+        }
+        return emit->vp;
+}
+static inline struct ir3_shader_variant *
+fd4_emit_get_fp(struct fd4_emit *emit)
+{
+        if (!emit->fp) {
+                struct fd4_shader_stateobj *so = emit->prog->fp;
+                emit->fp = ir3_shader_variant(so->shader, emit->key);
+        }
+        return emit->fp;
+}
+void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                struct fd4_emit *emit);
+void fd4_emit_restore(struct fd_context *ctx);
+#endif /* FD4_EMIT_H */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_format.c
 ,0 → 1,366
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+#include "fd4_format.h"
+/* Specifies the table of all the formats and their features. Also supplies
+ * the helpers that look up various data in those tables.
+ */
+struct fd4_format {
+        enum a4xx_vtx_fmt vtx;
+        enum a4xx_tex_fmt tex;
+        enum a4xx_color_fmt rb;
+        enum a3xx_color_swap swap;
+        boolean present;
+};
+#define RB4_NONE ~0
+/* vertex + texture */
+#define VT(pipe, fmt, rbfmt, swapfmt) \
+        [PIPE_FORMAT_ ## pipe] = { \
+                .present = 1, \
+                .vtx = VFMT4_ ## fmt, \
+                .tex = TFMT4_ ## fmt, \
+                .rb = RB4_ ## rbfmt, \
+                .swap = swapfmt \
+        }
+/* texture-only */
+#define _T(pipe, fmt, rbfmt, swapfmt) \
+        [PIPE_FORMAT_ ## pipe] = { \
+                .present = 1, \
+                .vtx = ~0, \
+                .tex = TFMT4_ ## fmt, \
+                .rb = RB4_ ## rbfmt, \
+                .swap = swapfmt \
+        }
+/* vertex-only */
+#define V_(pipe, fmt, rbfmt, swapfmt) \
+        [PIPE_FORMAT_ ## pipe] = { \
+                .present = 1, \
+                .vtx = VFMT4_ ## fmt, \
+                .tex = ~0, \
+                .rb = RB4_ ## rbfmt, \
+                .swap = swapfmt \
+        }
+static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
+        /* 8-bit */
+        VT(R8_UNORM,   8_UNORM, R8_UNORM, WZYX),
+        V_(R8_SNORM,   8_SNORM, NONE,     WZYX),
+        V_(R8_UINT,    8_UINT,  NONE,     WZYX),
+        V_(R8_SINT,    8_SINT,  NONE,     WZYX),
+        V_(R8_USCALED, 8_UINT,  NONE,     WZYX),
+        V_(R8_SSCALED, 8_UINT,  NONE,     WZYX),
+        _T(A8_UNORM,   8_UNORM, A8_UNORM, WZYX),
+        _T(L8_UNORM,   8_UNORM, R8_UNORM, WZYX),
+        _T(I8_UNORM,   8_UNORM, NONE,     WZYX),
+        /* 16-bit */
+        V_(R16_UNORM,   16_UNORM, NONE,     WZYX),
+        V_(R16_SNORM,   16_SNORM, NONE,     WZYX),
+        VT(R16_UINT,    16_UINT,  R16_UINT, WZYX),
+        VT(R16_SINT,    16_SINT,  R16_SINT, WZYX),
+        V_(R16_USCALED, 16_UINT,  NONE,     WZYX),
+        V_(R16_SSCALED, 16_UINT,  NONE,     WZYX),
+        VT(R16_FLOAT,   16_FLOAT, NONE,     WZYX),
+        _T(A16_UINT,    16_UINT,  NONE,     WZYX),
+        _T(A16_SINT,    16_SINT,  NONE,     WZYX),
+        _T(L16_UINT,    16_UINT,  NONE,     WZYX),
+        _T(L16_SINT,    16_SINT,  NONE,     WZYX),
+        _T(I16_UINT,    16_UINT,  NONE,     WZYX),
+        _T(I16_SINT,    16_SINT,  NONE,     WZYX),
+        VT(R8G8_UNORM,   8_8_UNORM, R8G8_UNORM, WZYX),
+        VT(R8G8_SNORM,   8_8_SNORM, R8G8_SNORM, WZYX),
+        VT(R8G8_UINT,    8_8_UINT,  NONE,       WZYX),
+        VT(R8G8_SINT,    8_8_SINT,  NONE,       WZYX),
+        V_(R8G8_USCALED, 8_8_UINT,  NONE,       WZYX),
+        V_(R8G8_SSCALED, 8_8_SINT,  NONE,       WZYX),
+        _T(L8A8_UINT,    8_8_UINT,  NONE,       WZYX),
+        _T(L8A8_SINT,    8_8_SINT,  NONE,       WZYX),
+        _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
+        _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ),
+        _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ),
+        /* 24-bit */
+        V_(R8G8B8_UNORM,   8_8_8_UNORM, NONE, WZYX),
+        V_(R8G8B8_SNORM,   8_8_8_SNORM, NONE, WZYX),
+        V_(R8G8B8_UINT,    8_8_8_UINT,  NONE, WZYX),
+        V_(R8G8B8_SINT,    8_8_8_SINT,  NONE, WZYX),
+        V_(R8G8B8_USCALED, 8_8_8_UINT,  NONE, WZYX),
+        V_(R8G8B8_SSCALED, 8_8_8_SINT,  NONE, WZYX),
+        /* 32-bit */
+        VT(R32_UINT,    32_UINT,  R32_UINT, WZYX),
+        VT(R32_SINT,    32_SINT,  R32_SINT, WZYX),
+        V_(R32_USCALED, 32_UINT,  NONE,     WZYX),
+        V_(R32_SSCALED, 32_UINT,  NONE,     WZYX),
+        VT(R32_FLOAT,   32_FLOAT, NONE,     WZYX),
+        V_(R32_FIXED,   32_FIXED, NONE,     WZYX),
+        _T(A32_UINT,    32_UINT,  NONE,     WZYX),
+        _T(A32_SINT,    32_SINT,  NONE,     WZYX),
+        _T(L32_UINT,    32_UINT,  NONE,     WZYX),
+        _T(L32_SINT,    32_SINT,  NONE,     WZYX),
+        _T(I32_UINT,    32_UINT,  NONE,     WZYX),
+        _T(I32_SINT,    32_SINT,  NONE,     WZYX),
+        V_(R16G16_UNORM,   16_16_UNORM, NONE,        WZYX),
+        V_(R16G16_SNORM,   16_16_SNORM, NONE,        WZYX),
+        VT(R16G16_UINT,    16_16_UINT,  R16G16_UINT, WZYX),
+        VT(R16G16_SINT,    16_16_SINT,  R16G16_SINT, WZYX),
+        V_(R16G16_USCALED, 16_16_UINT,  NONE,        WZYX),
+        V_(R16G16_SSCALED, 16_16_SINT,  NONE,        WZYX),
+        VT(R16G16_FLOAT,   16_16_FLOAT, NONE,        WZYX),
+        _T(L16A16_UINT,    16_16_UINT,  NONE,        WZYX),
+        _T(L16A16_SINT,    16_16_SINT,  NONE,        WZYX),
+        VT(R8G8B8A8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(R8G8B8X8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(R8G8B8A8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(R8G8B8X8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX),
+        VT(R8G8B8A8_SNORM,   8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX),
+        VT(R8G8B8A8_UINT,    8_8_8_8_UINT,  R8G8B8A8_UINT,  WZYX),
+        VT(R8G8B8A8_SINT,    8_8_8_8_SINT,  R8G8B8A8_SINT,  WZYX),
+        V_(R8G8B8A8_USCALED, 8_8_8_8_UINT,  NONE,           WZYX),
+        V_(R8G8B8A8_SSCALED, 8_8_8_8_SINT,  NONE,           WZYX),
+        VT(B8G8R8A8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        _T(B8G8R8X8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        VT(B8G8R8A8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        _T(B8G8R8X8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ),
+        VT(A8B8G8R8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        _T(X8B8G8R8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        _T(A8B8G8R8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        _T(X8B8G8R8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW),
+        VT(A8R8G8B8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        _T(X8R8G8B8_UNORM,   8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        _T(A8R8G8B8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        _T(X8R8G8B8_SRGB,    8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW),
+        VT(R10G10B10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX),
+        VT(B10G10R10A2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
+        _T(B10G10R10X2_UNORM,   10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ),
+        V_(R10G10B10A2_SNORM,   10_10_10_2_SNORM, NONE,              WZYX),
+        V_(R10G10B10A2_UINT,    10_10_10_2_UINT,  NONE,              WZYX),
+        V_(R10G10B10A2_USCALED, 10_10_10_2_UINT,  NONE,              WZYX),
+        V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT,  NONE,              WZYX),
+        _T(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX),
+        _T(R9G9B9E5_FLOAT,  9_9_9_E5_FLOAT, NONE,            WZYX),
+        _T(Z24X8_UNORM,       X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
+        _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
+        /*_T(Z32_FLOAT,         Z32_FLOAT,   R8G8B8A8_UNORM, WZYX),*/
+        /* 48-bit */
+        V_(R16G16B16_UNORM,   16_16_16_UNORM, NONE, WZYX),
+        V_(R16G16B16_SNORM,   16_16_16_SNORM, NONE, WZYX),
+        V_(R16G16B16_UINT,    16_16_16_UINT,  NONE, WZYX),
+        V_(R16G16B16_SINT,    16_16_16_SINT,  NONE, WZYX),
+        V_(R16G16B16_USCALED, 16_16_16_UINT,  NONE, WZYX),
+        V_(R16G16B16_SSCALED, 16_16_16_SINT,  NONE, WZYX),
+        V_(R16G16B16_FLOAT,   16_16_16_FLOAT, NONE, WZYX),
+        /* 64-bit */
+        V_(R16G16B16A16_UNORM,   16_16_16_16_UNORM, NONE,               WZYX),
+        V_(R16G16B16A16_SNORM,   16_16_16_16_SNORM, NONE,               WZYX),
+        VT(R16G16B16A16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
+        _T(R16G16B16X16_UINT,    16_16_16_16_UINT,  R16G16B16A16_UINT,  WZYX),
+        VT(R16G16B16A16_SINT,    16_16_16_16_SINT,  R16G16B16A16_SINT,  WZYX),
+        _T(R16G16B16X16_SINT,    16_16_16_16_SINT,  R16G16B16A16_SINT,  WZYX),
+        V_(R16G16B16A16_USCALED, 16_16_16_16_UINT,  NONE,               WZYX),
+        V_(R16G16B16A16_SSCALED, 16_16_16_16_SINT,  NONE,               WZYX),
+        VT(R16G16B16A16_FLOAT,   16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX),
+        _T(R16G16B16X16_FLOAT,   16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX),
+        VT(R32G32_UINT,    32_32_UINT,  R32G32_UINT, WZYX),
+        VT(R32G32_SINT,    32_32_SINT,  R32G32_SINT, WZYX),
+        V_(R32G32_USCALED, 32_32_UINT,  NONE,        WZYX),
+        V_(R32G32_SSCALED, 32_32_SINT,  NONE,        WZYX),
+        VT(R32G32_FLOAT,   32_32_FLOAT, NONE,        WZYX),
+        V_(R32G32_FIXED,   32_32_FIXED, NONE,        WZYX),
+        _T(L32A32_UINT,    32_32_UINT,  NONE,        WZYX),
+        _T(L32A32_SINT,    32_32_SINT,  NONE,        WZYX),
+        /* 96-bit */
+        V_(R32G32B32_UINT,    32_32_32_UINT,  NONE, WZYX),
+        V_(R32G32B32_SINT,    32_32_32_SINT,  NONE, WZYX),
+        V_(R32G32B32_USCALED, 32_32_32_UINT,  NONE, WZYX),
+        V_(R32G32B32_SSCALED, 32_32_32_SINT,  NONE, WZYX),
+        V_(R32G32B32_FLOAT,   32_32_32_FLOAT, NONE, WZYX),
+        V_(R32G32B32_FIXED,   32_32_32_FIXED, NONE, WZYX),
+        /* 128-bit */
+        VT(R32G32B32A32_UINT,    32_32_32_32_UINT,  R32G32B32A32_UINT,  WZYX),
+        _T(R32G32B32X32_UINT,    32_32_32_32_UINT,  R32G32B32A32_UINT,  WZYX),
+        VT(R32G32B32A32_SINT,    32_32_32_32_SINT,  R32G32B32A32_SINT,  WZYX),
+        _T(R32G32B32X32_SINT,    32_32_32_32_SINT,  R32G32B32A32_SINT,  WZYX),
+        V_(R32G32B32A32_USCALED, 32_32_32_32_UINT,  NONE,               WZYX),
+        V_(R32G32B32A32_SSCALED, 32_32_32_32_SINT,  NONE,               WZYX),
+        VT(R32G32B32A32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
+        _T(R32G32B32X32_FLOAT,   32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX),
+        V_(R32G32B32A32_FIXED,   32_32_32_32_FIXED, NONE,               WZYX),
+};
+/* convert pipe format to vertex buffer format: */
+enum a4xx_vtx_fmt
+fd4_pipe2vtx(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return ~0;
+        return formats[format].vtx;
+}
+/* convert pipe format to texture sampler format: */
+enum a4xx_tex_fmt
+fd4_pipe2tex(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return ~0;
+        return formats[format].tex;
+}
+/* convert pipe format to MRT / copydest format used for render-target: */
+enum a4xx_color_fmt
+fd4_pipe2color(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return ~0;
+        return formats[format].rb;
+}
+enum a3xx_color_swap
+fd4_pipe2swap(enum pipe_format format)
+{
+        if (!formats[format].present)
+                return WZYX;
+        return formats[format].swap;
+}
+enum a4xx_tex_fetchsize
+fd4_pipe2fetchsize(enum pipe_format format)
+{
+        switch (util_format_get_blocksizebits(format)) {
+        case 8:   return TFETCH4_1_BYTE;
+        case 16:  return TFETCH4_2_BYTE;
+        case 32:  return TFETCH4_4_BYTE;
+        case 64:  return TFETCH4_8_BYTE;
+        case 128: return TFETCH4_16_BYTE;
+        default:
+                debug_printf("Unknown block size for format %s: %d\n",
+                                util_format_name(format),
+                                util_format_get_blocksizebits(format));
+                return TFETCH4_1_BYTE;
+        }
+}
+/* we need to special case a bit the depth/stencil restore, because we are
+ * using the texture sampler to blit into the depth/stencil buffer, *not*
+ * into a color buffer.  Otherwise fd4_tex_swiz() will do the wrong thing,
+ * as it is assuming that you are sampling into normal render target..
+ *
+ * TODO looks like we can probably share w/ a3xx..
+ */
+enum pipe_format
+fd4_gmem_restore_format(enum pipe_format format)
+{
+        switch (format) {
+        case PIPE_FORMAT_Z24X8_UNORM:
+        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+                return PIPE_FORMAT_R8G8B8A8_UNORM;
+        case PIPE_FORMAT_Z16_UNORM:
+                return PIPE_FORMAT_R8G8_UNORM;
+        default:
+                return format;
+        }
+}
+enum a4xx_depth_format
+fd4_pipe2depth(enum pipe_format format)
+{
+        switch (format) {
+        case PIPE_FORMAT_Z16_UNORM:
+                return DEPTH4_16;
+        case PIPE_FORMAT_Z24X8_UNORM:
+        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+        case PIPE_FORMAT_X8Z24_UNORM:
+        case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+                return DEPTH4_24_8;
+        default:
+                return ~0;
+        }
+}
+static inline enum a4xx_tex_swiz
+tex_swiz(unsigned swiz)
+{
+        switch (swiz) {
+        default:
+        case PIPE_SWIZZLE_RED:   return A4XX_TEX_X;
+        case PIPE_SWIZZLE_GREEN: return A4XX_TEX_Y;
+        case PIPE_SWIZZLE_BLUE:  return A4XX_TEX_Z;
+        case PIPE_SWIZZLE_ALPHA: return A4XX_TEX_W;
+        case PIPE_SWIZZLE_ZERO:  return A4XX_TEX_ZERO;
+        case PIPE_SWIZZLE_ONE:   return A4XX_TEX_ONE;
+        }
+}
+uint32_t
+fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+                unsigned swizzle_b, unsigned swizzle_a)
+{
+        const struct util_format_description *desc =
+                        util_format_description(format);
+        unsigned char swiz[4] = {
+                        swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+        }, rswiz[4];
+        util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+        return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+                        A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+                        A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+                        A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_format.h
 ,0 → 1,47
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_UTIL_H_
+#define FD4_UTIL_H_
+#include "freedreno_util.h"
+#include "a4xx.xml.h"
+enum a4xx_vtx_fmt fd4_pipe2vtx(enum pipe_format format);
+enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format);
+enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format);
+enum pipe_format fd4_gmem_restore_format(enum pipe_format format);
+enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);
+enum a4xx_tex_fetchsize fd4_pipe2fetchsize(enum pipe_format format);
+enum a4xx_depth_format fd4_pipe2depth(enum pipe_format format);
+uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+                unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+#endif /* FD4_UTIL_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
 ,0 → 1,614
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "freedreno_draw.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "fd4_gmem.h"
+#include "fd4_context.h"
+#include "fd4_draw.h"
+#include "fd4_emit.h"
+#include "fd4_program.h"
+#include "fd4_format.h"
+#include "fd4_zsa.h"
+static const struct ir3_shader_key key = {
+                // XXX should set this based on render target format!  We don't
+                // want half_precision if float32 render target!!!
+                .half_precision = true,
+};
+static void
+emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+                struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
+{
+        enum a4xx_tile_mode tile_mode;
+        unsigned i;
+        if (bin_w) {
+                tile_mode = 2;
+        } else {
+                tile_mode = TILE4_LINEAR;
+        }
+        for (i = 0; i < 8; i++) {
+                enum a4xx_color_fmt format = 0;
+                enum a3xx_color_swap swap = WZYX;
+                struct fd_resource *rsc = NULL;
+                struct fd_resource_slice *slice = NULL;
+                uint32_t stride = 0;
+                uint32_t base = 0;
+                uint32_t offset = 0;
+                if ((i < nr_bufs) && bufs[i]) {
+                        struct pipe_surface *psurf = bufs[i];
+                        rsc = fd_resource(psurf->texture);
+                        slice = fd_resource_slice(rsc, psurf->u.tex.level);
+                        format = fd4_pipe2color(psurf->format);
+                        swap = fd4_pipe2swap(psurf->format);
+                        debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+                        offset = fd_resource_offset(rsc, psurf->u.tex.level,
+                                        psurf->u.tex.first_layer);
+                        if (bin_w) {
+                                stride = bin_w * rsc->cpp;
+                                if (bases) {
+                                        base = bases[i];
+                                }
+                        } else {
+                                stride = slice->pitch * rsc->cpp;
+                        }
+                }
+                OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
+                OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+                                A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
+                                A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+                                A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
+                if (bin_w || (i >= nr_bufs)) {
+                        OUT_RING(ring, base);
+                        OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
+                } else {
+                        OUT_RELOCW(ring, rsc->bo, offset, 0, 0);
+                        /* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
+                         * not sure if we need to skip it for bypass or
+                         * not.
+                         */
+                        OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
+                }
+        }
+}
+static uint32_t
+depth_base(struct fd_context *ctx)
+{
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        uint32_t cpp = 4;
+        if (pfb->cbufs[0]) {
+                struct fd_resource *rsc =
+                                fd_resource(pfb->cbufs[0]->texture);
+                cpp = rsc->cpp;
+        }
+        return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
+}
+/* transfer from gmem to system memory (ie. normal RAM) */
+static void
+emit_gmem2mem_surf(struct fd_context *ctx,
+                uint32_t base, struct pipe_surface *psurf)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_resource *rsc = fd_resource(psurf->texture);
+        struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
+        uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
+                        psurf->u.tex.first_layer);
+        debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+        OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
+        OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+                        A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
+                        A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
+        OUT_RELOCW(ring, rsc->bo, offset, 0, 0);   /* RB_COPY_DEST_BASE */
+        OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
+        OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
+                        A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |
+                        A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+                        A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+                        A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format)));
+        fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+static void
+fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd4_emit emit = {
+                        .vtx = &fd4_ctx->solid_vbuf_state,
+                        .prog = &ctx->solid_prog,
+                        .key = key,
+                        .format = fd4_emit_format(pfb->cbufs[0]),
+        };
+        OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+        OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+        OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                        A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                        A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+        OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+        OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+        OUT_RING(ring, 0xff000000 |
+                        A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+                        A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                        A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+        OUT_RING(ring, 0xff000000 |
+                        A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+                        A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+                        A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+        OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+        fd_wfi(ctx, ring);
+        OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+        OUT_RING(ring, 0x80000);      /* GRAS_CL_CLIP_CNTL */
+        OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
+        OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+xa);       /* XXX */
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                        A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                        A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+        OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+        OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+        OUT_RING(ring, 0x00000002);
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+        OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+                        A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+        OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                        A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+        OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
+        fd4_program_emit(ring, &emit);
+        fd4_emit_vertex_bufs(ring, &emit);
+        if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+                uint32_t base = depth_base(ctx);
+                emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
+        }
+        if (ctx->resolve & FD_BUFFER_COLOR) {
+                emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
+        }
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                        A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+/* transfer from system memory to gmem */
+static void
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
+                struct pipe_surface *psurf, uint32_t bin_w)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        emit_mrt(ring, 1, &psurf, &base, bin_w);
+        fd4_emit_gmem_restore_tex(ring, psurf);
+        fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                        DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+static void
+fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd4_emit emit = {
+                        .vtx = &fd4_ctx->blit_vbuf_state,
+                        .prog = &ctx->blit_prog[0],
+                        .key = key,
+                        .format = fd4_emit_format(pfb->cbufs[0]),
+        };
+        float x0, y0, x1, y1;
+        unsigned bin_w = tile->bin_w;
+        unsigned bin_h = tile->bin_h;
+        unsigned i;
+        /* write texture coordinates to vertexbuf: */
+        x0 = ((float)tile->xoff) / ((float)pfb->width);
+        x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
+        y0 = ((float)tile->yoff) / ((float)pfb->height);
+        y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
+        OUT_PKT3(ring, CP_MEM_WRITE, 5);
+        OUT_RELOCW(ring, fd_resource(fd4_ctx->blit_texcoord_vbuf)->bo, 0, 0, 0);
+        OUT_RING(ring, fui(x0));
+        OUT_RING(ring, fui(y0));
+        OUT_RING(ring, fui(x1));
+        OUT_RING(ring, fui(y1));
+        for (i = 0; i < 8; i++) {
+                OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+                OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
+                                A4XX_RB_MRT_CONTROL_B11 |
+                                A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
+                OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+                OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+                                A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                                A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+        }
+        OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+        OUT_RING(ring, 0x8);          /* XXX RB_RENDER_CONTROL */
+        OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
+        OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+        OUT_RING(ring, 0x280000);     /* XXX GRAS_CL_CLIP_CNTL */
+        OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0) |
+                        A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
+        OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)bin_w/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)bin_w/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)bin_h/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)bin_h/2.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+        OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+        OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
+                        A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
+        OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                        A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+                        A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
+                        A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
+        OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+                        A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h));
+        OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+        OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+                        A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
+                        A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                        A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+        OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                        A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+        OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+        OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
+                        A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));
+        OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+        OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+        OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
+        fd4_program_emit(ring, &emit);
+        fd4_emit_vertex_bufs(ring, &emit);
+        /* for gmem pitch/base calculations, we need to use the non-
+         * truncated tile sizes:
+         */
+        bin_w = gmem->bin_w;
+        bin_h = gmem->bin_h;
+        if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+                emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
+        if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
+                emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+        OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                        A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                        A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+        OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+                        A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
+x00010000);  /* XXX */
+}
+static void
+patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
+{
+        unsigned i;
+        for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
+                struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
+                *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
+        }
+        util_dynarray_resize(&ctx->draw_patches, 0);
+}
+static void
+patch_rbrc(struct fd_context *ctx, uint32_t val)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        unsigned i;
+        for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
+                struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
+                *patch->cs = patch->val | val;
+        }
+        util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
+}
+/* for rendering directly to system memory: */
+static void
+fd4_emit_sysmem_prep(struct fd_context *ctx)
+{
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd_ringbuffer *ring = ctx->ring;
+        fd4_emit_restore(ctx);
+        OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
+        OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+                        A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, 0);
+        /* setup scissor/offset for current tile: */
+        OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
+        OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(0) |
+                        A4XX_RB_BIN_OFFSET_Y(0));
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+                        A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+        OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(pfb->width - 1) |
+                        A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(pfb->height - 1));
+        OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(0) |
+                        A4XX_RB_MODE_CONTROL_HEIGHT(0) |
+x00c00000);  /* XXX */
+        patch_draws(ctx, IGNORE_VISIBILITY);
+        patch_rbrc(ctx, 0);  // XXX
+}
+static void
+update_vsc_pipe(struct fd_context *ctx)
+{
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct fd_ringbuffer *ring = ctx->ring;
+        int i;
+        OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
+        OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+        OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
+        for (i = 0; i < 8; i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
+                                A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
+                                A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
+                                A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
+        }
+        OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
+        for (i = 0; i < 8; i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                if (!pipe->bo) {
+                        pipe->bo = fd_bo_new(ctx->dev, 0x40000,
+                                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+                }
+                OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE_DATA_ADDRESS[i] */
+        }
+        OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
+        for (i = 0; i < 8; i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
+        }
+}
+/* before first tile */
+static void
+fd4_emit_tile_init(struct fd_context *ctx)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        uint32_t rb_render_control;
+        fd4_emit_restore(ctx);
+        OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
+        OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+                        A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+        OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+        OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+                        A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
+x00010000);  /* XXX */
+        update_vsc_pipe(ctx);
+        patch_draws(ctx, IGNORE_VISIBILITY);
+        rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem
+        patch_rbrc(ctx, rb_render_control);
+}
+/* before mem2gmem */
+static void
+fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        uint32_t reg;
+        OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+        reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
+        if (pfb->zsbuf) {
+                reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format));
+        }
+        OUT_RING(ring, reg);
+        if (pfb->zsbuf) {
+                uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+                OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
+                OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
+        } else {
+                OUT_RING(ring, 0x00000000);
+                OUT_RING(ring, 0x00000000);
+        }
+        OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
+        if (pfb->zsbuf) {
+                OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
+                                fd4_pipe2depth(pfb->zsbuf->format)));
+        } else {
+                OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(DEPTH4_NONE));
+        }
+        if (ctx->needs_rb_fbd) {
+                fd_wfi(ctx, ring);
+                OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
+                OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+                                A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+                ctx->needs_rb_fbd = false;
+        }
+}
+/* before IB to rendering cmds: */
+static void
+fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
+{
+        struct fd_ringbuffer *ring = ctx->ring;
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        uint32_t x1 = tile->xoff;
+        uint32_t y1 = tile->yoff;
+        uint32_t x2 = tile->xoff + tile->bin_w - 1;
+        uint32_t y2 = tile->yoff + tile->bin_h - 1;
+        OUT_PKT3(ring, CP_SET_BIN, 3);
+        OUT_RING(ring, 0x00000000);
+        OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+        OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+        emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+        /* setup scissor/offset for current tile: */
+        OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
+        OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
+                        A4XX_RB_BIN_OFFSET_Y(tile->yoff));
+        OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+        OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+                        A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+        OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+                        A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+}
+void
+fd4_gmem_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->emit_sysmem_prep = fd4_emit_sysmem_prep;
+        ctx->emit_tile_init = fd4_emit_tile_init;
+        ctx->emit_tile_prep = fd4_emit_tile_prep;
+        ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
+        ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
+        ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_GMEM_H_
+#define FD4_GMEM_H_
+#include "pipe/p_context.h"
+void fd4_gmem_init(struct pipe_context *pctx);
+#endif /* FD4_GMEM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_program.c
 ,0 → 1,548
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "freedreno_program.h"
+#include "fd4_program.h"
+#include "fd4_emit.h"
+#include "fd4_texture.h"
+#include "fd4_format.h"
+static void
+delete_shader_stateobj(struct fd4_shader_stateobj *so)
+{
+        ir3_shader_destroy(so->shader);
+        free(so);
+}
+static struct fd4_shader_stateobj *
+create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+                enum shader_t type)
+{
+        struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj);
+        so->shader = ir3_shader_create(pctx, cso->tokens, type);
+        return so;
+}
+static void *
+fd4_fp_state_create(struct pipe_context *pctx,
+                const struct pipe_shader_state *cso)
+{
+        return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+}
+static void
+fd4_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd4_shader_stateobj *so = hwcso;
+        delete_shader_stateobj(so);
+}
+static void *
+fd4_vp_state_create(struct pipe_context *pctx,
+                const struct pipe_shader_state *cso)
+{
+        return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+}
+static void
+fd4_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd4_shader_stateobj *so = hwcso;
+        delete_shader_stateobj(so);
+}
+static void
+emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
+{
+        const struct ir3_info *si = &so->info;
+        enum adreno_state_block sb;
+        enum adreno_state_src src;
+        uint32_t i, sz, *bin;
+        if (so->type == SHADER_VERTEX) {
+                sb = SB_VERT_SHADER;
+        } else {
+                sb = SB_FRAG_SHADER;
+        }
+        if (fd_mesa_debug & FD_DBG_DIRECT) {
+                sz = si->sizedwords;
+                src = SS_DIRECT;
+                bin = fd_bo_map(so->bo);
+        } else {
+                sz = 0;
+                src = 2;  // enums different on a4xx..
+                bin = NULL;
+        }
+        OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                        CP_LOAD_STATE_0_STATE_SRC(src) |
+                        CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                        CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+        if (bin) {
+                OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+        } else {
+                OUT_RELOC(ring, so->bo, 0,
+                                CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+        }
+        for (i = 0; i < sz; i++) {
+                OUT_RING(ring, bin[i]);
+        }
+}
+struct stage {
+        const struct ir3_shader_variant *v;
+        const struct ir3_info *i;
+        /* const sizes are in units of 4 * vec4 */
+        uint8_t constoff;
+        uint8_t constlen;
+        /* instr sizes are in units of 16 instructions */
+        uint8_t instroff;
+        uint8_t instrlen;
+};
+enum {
+        VS = 0,
+        FS = 1,
+        HS = 2,
+        DS = 3,
+        GS = 4,
+        MAX_STAGES
+};
+static void
+setup_stages(struct fd4_emit *emit, struct stage *s)
+{
+        unsigned i;
+        s[VS].v = fd4_emit_get_vp(emit);
+        if (emit->key.binning_pass) {
+                /* use dummy stateobj to simplify binning vs non-binning: */
+                static const struct ir3_shader_variant binning_fp = {};
+                s[FS].v = &binning_fp;
+        } else {
+                s[FS].v = fd4_emit_get_fp(emit);
+        }
+        s[HS].v = s[DS].v = s[GS].v = NULL;  /* for now */
+        for (i = 0; i < MAX_STAGES; i++) {
+                if (s[i].v) {
+                        s[i].i = &s[i].v->info;
+                        /* constlen is in units of 4 * vec4: */
+                        s[i].constlen = align(s[i].v->constlen, 4) / 4;
+                        /* instrlen is already in units of 16 instr.. although
+                         * probably we should ditch that and not make the compiler
+                         * care about instruction group size of a3xx vs a4xx
+                         */
+                        s[i].instrlen = s[i].v->instrlen;
+                } else {
+                        s[i].i = NULL;
+                        s[i].constlen = 0;
+                        s[i].instrlen = 0;
+                }
+        }
+        /* NOTE: at least for gles2, blob partitions VS at bottom of const
+         * space and FS taking entire remaining space.  We probably don't
+         * need to do that the same way, but for now mimic what the blob
+         * does to make it easier to diff against register values from blob
+         *
+         * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders
+         * is run from external memory.
+         */
+        if ((s[VS].instrlen + s[FS].instrlen) > 64) {
+                /* prioritize FS for internal memory: */
+                if (s[FS].instrlen < 64) {
+                        /* if FS can fit, kick VS out to external memory: */
+                        s[VS].instrlen = 0;
+                } else if (s[VS].instrlen < 64) {
+                        /* otherwise if VS can fit, kick out FS: */
+                        s[FS].instrlen = 0;
+                } else {
+                        /* neither can fit, run both from external memory: */
+                        s[VS].instrlen = 0;
+                        s[FS].instrlen = 0;
+                }
+        }
+        s[VS].constlen = 66;
+        s[FS].constlen = 128 - s[VS].constlen;
+        s[VS].instroff = 0;
+        s[VS].constoff = 0;
+        s[FS].instroff = 64 - s[FS].instrlen;
+        s[FS].constoff = s[VS].constlen;
+        s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
+        s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff;
+}
+void
+fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+{
+        struct stage s[MAX_STAGES];
+        uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+        uint32_t face_regid, coord_regid, zwcoord_regid;
+        int constmode;
+        int i, j, k;
+        setup_stages(emit, s);
+        /* blob seems to always use constmode currently: */
+        constmode = 1;
+        pos_regid = ir3_find_output_regid(s[VS].v,
+                ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+        posz_regid = ir3_find_output_regid(s[FS].v,
+                ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+        psize_regid = ir3_find_output_regid(s[VS].v,
+                ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+        color_regid = ir3_find_output_regid(s[FS].v,
+                ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+        if (util_format_is_alpha(emit->pformat))
+                color_regid += 3;
+        /* TODO get these dynamically: */
+        face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
+        coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
+        zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
+        /* we could probably divide this up into things that need to be
+         * emitted if frag-prog is dirty vs if vert-prog is dirty..
+         */
+        OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
+        OUT_RING(ring, 0x00000003);
+        OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5);
+        OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+                        A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
+                        A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+                        /* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
+                         * flush some caches? I think we only need to set those
+                         * bits if we have updated const or shader..
+                         */
+                        A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+                        A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+        OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+                        A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
+                        A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) |
+                        A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid));
+        OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
+x3f3f000 |           /* XXX */
+                        A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
+        OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) |
+xfcfcfc00);
+        OUT_RING(ring, 0x00fcfcfc);   /* XXX HLSQ_CONTROL_4 */
+        OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
+        OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
+                        A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+                        A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
+                        A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
+        OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
+                        A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+                        A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
+                        A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
+        OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
+                        A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+                        A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
+                        A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
+        OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
+                        A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+                        A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
+                        A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
+        OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
+                        A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+                        A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
+                        A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));
+        OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
+        OUT_RING(ring, 0x140010 | /* XXX */
+                        COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));
+        OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
+        OUT_RING(ring, 0x7f | /* XXX */
+                        COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) |
+                        COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) |
+                        COND(s[VS].instrlen && s[FS].instrlen,
+                                        A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER));
+        OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
+        OUT_RING(ring, s[VS].v->instrlen);      /* SP_VS_LENGTH_REG */
+        OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
+        OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+                        A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+                        A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+                        A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
+                        A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+                        A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+                        COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+        OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
+                        A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
+        OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
+                        A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
+                        A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4));
+        for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) {
+                uint32_t reg = 0;
+                OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);
+                j = ir3_next_varying(s[FS].v, j);
+                if (j < s[FS].v->inputs_count) {
+                        k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+                        reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
+                        reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
+                }
+                j = ir3_next_varying(s[FS].v, j);
+                if (j < s[FS].v->inputs_count) {
+                        k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+                        reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
+                        reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
+                }
+                OUT_RING(ring, reg);
+        }
+        for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) {
+                uint32_t reg = 0;
+                OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);
+                j = ir3_next_varying(s[FS].v, j);
+                if (j < s[FS].v->inputs_count)
+                        reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc);
+                j = ir3_next_varying(s[FS].v, j);
+                if (j < s[FS].v->inputs_count)
+                        reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc);
+                j = ir3_next_varying(s[FS].v, j);
+                if (j < s[FS].v->inputs_count)
+                        reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc);
+                j = ir3_next_varying(s[FS].v, j);
+                if (j < s[FS].v->inputs_count)
+                        reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc);
+                OUT_RING(ring, reg);
+        }
+        OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
+        OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+                        A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
+        OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */
+        OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
+        OUT_RING(ring, s[FS].v->instrlen);  /* SP_FS_LENGTH_REG */
+        OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
+        OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+                        COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
+                        A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
+                        A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
+                        A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+                        A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+                        A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+                        COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+        OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
+x80000000 |      /* XXX */
+                        COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) |
+                        COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) |
+                        COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD));
+        OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
+        OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+                        A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
+        if (emit->key.binning_pass)
+                OUT_RING(ring, 0x00000000);
+        else
+                OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
+        OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
+        OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+                        A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));
+        OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
+        OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+                        A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));
+        OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
+        OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+                        A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));
+        OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
+        OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
+                        COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) |
+                        COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
+                        COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD |
+                                        A4XX_RB_RENDER_CONTROL2_YCOORD |
+// TODO enabling gl_FragCoord.z is causing lockups on 0ad (but seems
+// to work everywhere else).
+//                                      A4XX_RB_RENDER_CONTROL2_ZCOORD |
+                                        A4XX_RB_RENDER_CONTROL2_WCOORD));
+        OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
+        OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(1) |
+                        COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));
+        OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
+        if (s[FS].v->writes_pos) {
+                OUT_RING(ring, 0x00000001 |
+                                A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
+                                A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
+        } else {
+                OUT_RING(ring, 0x00000001);
+        }
+        OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) |
+                        A4XX_SP_FS_MRT_REG_MRTFORMAT(emit->format) |
+                        COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION));
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+        OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+        if (emit->key.binning_pass) {
+                OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+                OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) |
+x40000000 |      /* XXX */
+                                COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+                OUT_RING(ring, 0x00000000);
+        } else {
+                uint32_t vinterp[8], flatshade[2];
+                memset(vinterp, 0, sizeof(vinterp));
+                memset(flatshade, 0, sizeof(flatshade));
+                /* looks like we need to do int varyings in the frag
+                 * shader on a4xx (no flatshad reg?  or a420.0 bug?):
+                 *
+                 *    (sy)(ss)nop
+                 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
+                 *    ldlv.u32 r0.y,l[r0.x+1], 1
+                 *    (ss)bary.f (ei)r63.x, 0, r0.x
+                 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
+                 *    (rpt5)nop
+                 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
+                 *
+                 * Possibly on later a4xx variants we'll be able to use
+                 * something like the code below instead of workaround
+                 * in the shader:
+                 */
+#if 0
+                /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+                for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
+                        uint32_t interp = s[FS].v->inputs[j].interpolate;
+                        if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
+                                        ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+                                /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+                                 * instead.. rather than -8 everywhere else..
+                                 */
+                                uint32_t loc = s[FS].v->inputs[j].inloc - 8;
+                                /* currently assuming varyings aligned to 4 (not
+                                 * packed):
+                                 */
+                                debug_assert((loc % 4) == 0);
+                                for (i = 0; i < 4; i++, loc++) {
+                                        vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+                                        flatshade[loc / 32] |= 1 << (loc % 32);
+                                }
+                        }
+                }
+#endif
+                OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+                OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
+                                A4XX_VPC_ATTR_THRDASSIGN(1) |
+                                COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) |
+x40000000 |      /* XXX */
+                                COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+                OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) |
+                                A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in));
+                OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8);
+                for (i = 0; i < 8; i++)
+                        OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */
+                OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+                for (i = 0; i < 8; i++)
+                        OUT_RING(ring, s[FS].v->shader->vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
+        }
+        if (s[VS].instrlen)
+                emit_shader(ring, s[VS].v);
+        if (!emit->key.binning_pass)
+                if (s[FS].instrlen)
+                        emit_shader(ring, s[FS].v);
+}
+/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
+static void
+fix_blit_fp(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd4_shader_stateobj *so = ctx->blit_prog[0].fp;
+        so->shader->vpsrepl[0] = 0x99999999;
+        so->shader->vpsrepl[1] = 0x99999999;
+        so->shader->vpsrepl[2] = 0x99999999;
+        so->shader->vpsrepl[3] = 0x99999999;
+}
+void
+fd4_prog_init(struct pipe_context *pctx)
+{
+        pctx->create_fs_state = fd4_fp_state_create;
+        pctx->delete_fs_state = fd4_fp_state_delete;
+        pctx->create_vs_state = fd4_vp_state_create;
+        pctx->delete_vs_state = fd4_vp_state_delete;
+        fd_prog_init(pctx);
+        fix_blit_fp(pctx);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_program.h
 ,0 → 1,46
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_PROGRAM_H_
+#define FD4_PROGRAM_H_
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "ir3_shader.h"
+struct fd4_shader_stateobj {
+        struct ir3_shader *shader;
+};
+struct fd4_emit;
+void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+void fd4_prog_init(struct pipe_context *pctx);
+#endif /* FD4_PROGRAM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_query.c
 ,0 → 1,39
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+#include "fd4_query.h"
+#include "fd4_format.h"
+void fd4_query_context_init(struct pipe_context *pctx)
+{
+        /* TODO */
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_query.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_QUERY_H_
+#define FD4_QUERY_H_
+#include "pipe/p_context.h"
+void fd4_query_context_init(struct pipe_context *pctx);
+#endif /* FD4_QUERY_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
 ,0 → 1,94
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd4_rasterizer.h"
+#include "fd4_context.h"
+#include "fd4_format.h"
+void *
+fd4_rasterizer_state_create(struct pipe_context *pctx,
+                const struct pipe_rasterizer_state *cso)
+{
+        struct fd4_rasterizer_stateobj *so;
+        float psize_min, psize_max;
+        so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        if (cso->point_size_per_vertex) {
+                psize_min = util_get_min_point_size(cso);
+                psize_max = 8192;
+        } else {
+                /* Force the point size to be as if the vertex output was disabled. */
+                psize_min = cso->point_size;
+                psize_max = cso->point_size;
+        }
+/*
+        if (cso->line_stipple_enable) {
+                ??? TODO line stipple
+        }
+        TODO cso->half_pixel_center
+        if (cso->multisample)
+                TODO
+*/
+        so->gras_cl_clip_cntl = 0x80000; /* ??? */
+        so->gras_su_point_minmax =
+                        A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
+                        A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
+        so->gras_su_point_size   = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+        so->gras_su_poly_offset_scale =
+                        A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
+        so->gras_su_poly_offset_offset =
+                        A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+        so->gras_su_mode_control =
+                        A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+        if (cso->cull_face & PIPE_FACE_FRONT)
+                so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+        if (cso->cull_face & PIPE_FACE_BACK)
+                so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+        if (!cso->front_ccw)
+                so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
+        if (!cso->flatshade_first)
+                so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+        if (cso->offset_tri)
+                so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
 ,0 → 1,56
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_RASTERIZER_H_
+#define FD4_RASTERIZER_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+struct fd4_rasterizer_stateobj {
+        struct pipe_rasterizer_state base;
+        uint32_t gras_su_point_minmax;
+        uint32_t gras_su_point_size;
+        uint32_t gras_su_poly_offset_scale;
+        uint32_t gras_su_poly_offset_offset;
+        uint32_t gras_su_mode_control;
+        uint32_t gras_cl_clip_cntl;
+        uint32_t pc_prim_vtx_cntl;
+};
+static INLINE struct fd4_rasterizer_stateobj *
+fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+        return (struct fd4_rasterizer_stateobj *)rast;
+}
+void * fd4_rasterizer_state_create(struct pipe_context *pctx,
+                const struct pipe_rasterizer_state *cso);
+#endif /* FD4_RASTERIZER_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
 ,0 → 1,106
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+#include "fd4_screen.h"
+#include "fd4_context.h"
+#include "fd4_format.h"
+static boolean
+fd4_screen_is_format_supported(struct pipe_screen *pscreen,
+                enum pipe_format format,
+                enum pipe_texture_target target,
+                unsigned sample_count,
+                unsigned usage)
+{
+        unsigned retval = 0;
+        if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+                        (sample_count > 1) || /* TODO add MSAA */
+                        !util_format_is_supported(format, usage)) {
+                DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+                                util_format_name(format), target, sample_count, usage);
+                return FALSE;
+        }
+        if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+                        (fd4_pipe2vtx(format) != ~0)) {
+                retval |= PIPE_BIND_VERTEX_BUFFER;
+        }
+        if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+                        (fd4_pipe2tex(format) != ~0)) {
+                retval |= PIPE_BIND_SAMPLER_VIEW;
+        }
+        if ((usage & (PIPE_BIND_RENDER_TARGET |
+                                PIPE_BIND_DISPLAY_TARGET |
+                                PIPE_BIND_SCANOUT |
+                                PIPE_BIND_SHARED)) &&
+                        (fd4_pipe2color(format) != ~0) &&
+                        (fd4_pipe2tex(format) != ~0)) {
+                retval |= usage & (PIPE_BIND_RENDER_TARGET |
+                                PIPE_BIND_DISPLAY_TARGET |
+                                PIPE_BIND_SCANOUT |
+                                PIPE_BIND_SHARED);
+        }
+        if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+                        (fd4_pipe2depth(format) != ~0) &&
+                        (fd4_pipe2tex(format) != ~0)) {
+                retval |= PIPE_BIND_DEPTH_STENCIL;
+        }
+        if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+                        (fd_pipe2index(format) != ~0)) {
+                retval |= PIPE_BIND_INDEX_BUFFER;
+        }
+        if (usage & PIPE_BIND_TRANSFER_READ)
+                retval |= PIPE_BIND_TRANSFER_READ;
+        if (usage & PIPE_BIND_TRANSFER_WRITE)
+                retval |= PIPE_BIND_TRANSFER_WRITE;
+        if (retval != usage) {
+                DBG("not supported: format=%s, target=%d, sample_count=%d, "
+                                "usage=%x, retval=%x", util_format_name(format),
+                                target, sample_count, usage, retval);
+        }
+        return retval == usage;
+}
+void
+fd4_screen_init(struct pipe_screen *pscreen)
+{
+        fd_screen(pscreen)->max_rts = 1;
+        pscreen->context_create = fd4_context_create;
+        pscreen->is_format_supported = fd4_screen_is_format_supported;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_screen.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_SCREEN_H_
+#define FD4_SCREEN_H_
+#include "pipe/p_screen.h"
+void fd4_screen_init(struct pipe_screen *pscreen);
+#endif /* FD4_SCREEN_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
 ,0 → 1,247
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "fd4_texture.h"
+#include "fd4_format.h"
+/* TODO do we need to emulate clamp-to-edge like a3xx? */
+static enum a4xx_tex_clamp
+tex_clamp(unsigned wrap)
+{
+        /* hardware probably supports more, but we can't coax all the
+         * wrap/clamp modes out of the GLESv2 blob driver.
+         *
+         * TODO once we have basics working, go back and just try
+         * different values and see what happens
+         */
+        switch (wrap) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return A4XX_TEX_REPEAT;
+        case PIPE_TEX_WRAP_CLAMP:
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return A4XX_TEX_CLAMP_TO_EDGE;
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+// TODO
+//              return A4XX_TEX_CLAMP_TO_BORDER;
+        case PIPE_TEX_WRAP_MIRROR_CLAMP:
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+        case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+// TODO
+//              return A4XX_TEX_MIRROR_CLAMP;
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return A4XX_TEX_MIRROR_REPEAT;
+        default:
+                DBG("invalid wrap: %u", wrap);
+                return 0;
+        }
+}
+static enum a4xx_tex_filter
+tex_filter(unsigned filter, bool aniso)
+{
+        switch (filter) {
+        case PIPE_TEX_FILTER_NEAREST:
+                return A4XX_TEX_NEAREST;
+        case PIPE_TEX_FILTER_LINEAR:
+                return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR;
+        default:
+                DBG("invalid filter: %u", filter);
+                return 0;
+        }
+}
+static void *
+fd4_sampler_state_create(struct pipe_context *pctx,
+                const struct pipe_sampler_state *cso)
+{
+        struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
+        unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
+        bool miplinear = false;
+        if (!so)
+                return NULL;
+        if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+                miplinear = true;
+        so->base = *cso;
+        so->texsamp0 =
+                COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
+                A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+                A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+                A4XX_TEX_SAMP_0_ANISO(aniso) |
+                A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
+                A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
+                A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+        so->texsamp1 =
+//              COND(miplinear, A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
+                COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
+        if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+                so->texsamp1 |=
+                        A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+                        A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+        }
+        if (cso->compare_mode)
+                so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+        return so;
+}
+static enum a4xx_tex_type
+tex_type(unsigned target)
+{
+        switch (target) {
+        default:
+                assert(0);
+        case PIPE_BUFFER:
+        case PIPE_TEXTURE_1D:
+        case PIPE_TEXTURE_1D_ARRAY:
+                return A4XX_TEX_1D;
+        case PIPE_TEXTURE_RECT:
+        case PIPE_TEXTURE_2D:
+        case PIPE_TEXTURE_2D_ARRAY:
+                return A4XX_TEX_2D;
+        case PIPE_TEXTURE_3D:
+                return A4XX_TEX_3D;
+        case PIPE_TEXTURE_CUBE:
+        case PIPE_TEXTURE_CUBE_ARRAY:
+                return A4XX_TEX_CUBE;
+        }
+}
+static struct pipe_sampler_view *
+fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+                const struct pipe_sampler_view *cso)
+{
+        struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
+        struct fd_resource *rsc = fd_resource(prsc);
+        unsigned lvl = cso->u.tex.first_level;
+        unsigned miplevels = cso->u.tex.last_level - lvl;
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        pipe_reference(NULL, &prsc->reference);
+        so->base.texture = prsc;
+        so->base.reference.count = 1;
+        so->base.context = pctx;
+        so->texconst0 =
+                A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+                A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
+                A4XX_TEX_CONST_0_MIPLVLS(miplevels) |
+                fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+                                cso->swizzle_b, cso->swizzle_a);
+        if (util_format_is_srgb(cso->format))
+                so->texconst0 |= A4XX_TEX_CONST_0_SRGB;
+        so->texconst1 =
+                A4XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+                A4XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+        so->texconst2 =
+                A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
+                A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+        switch (prsc->target) {
+        case PIPE_TEXTURE_1D_ARRAY:
+        case PIPE_TEXTURE_2D_ARRAY:
+                so->texconst3 =
+                        A4XX_TEX_CONST_3_DEPTH(prsc->array_size) |
+                        A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
+                break;
+        case PIPE_TEXTURE_CUBE:
+        case PIPE_TEXTURE_CUBE_ARRAY:  /* ?? not sure about _CUBE_ARRAY */
+                so->texconst3 =
+                        A4XX_TEX_CONST_3_DEPTH(1) |
+                        A4XX_TEX_CONST_3_LAYERSZ(rsc->layer_size);
+                break;
+        case PIPE_TEXTURE_3D:
+                so->texconst3 =
+                        A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
+                        A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
+                break;
+        default:
+                so->texconst3 = 0x00000000;
+                break;
+        }
+        return &so->base;
+}
+static void
+fd4_set_sampler_views(struct pipe_context *pctx, unsigned shader,
+                unsigned start, unsigned nr, struct pipe_sampler_view **views)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd4_context *fd4_ctx = fd4_context(ctx);
+        struct fd_texture_stateobj *tex;
+        uint16_t integer_s = 0, *ptr;
+        int i;
+        fd_set_sampler_views(pctx, shader, start, nr, views);
+        switch (shader) {
+        case PIPE_SHADER_FRAGMENT:
+                tex = &ctx->fragtex;
+                ptr = &fd4_ctx->finteger_s;
+                break;
+        case PIPE_SHADER_VERTEX:
+                tex = &ctx->verttex;
+                ptr = &fd4_ctx->vinteger_s;
+                break;
+        default:
+                return;
+        }
+        for (i = 0; i < tex->num_textures; i++)
+                if (util_format_is_pure_integer(tex->textures[i]->format))
+                        integer_s |= 1 << i;
+        *ptr = integer_s;
+}
+void
+fd4_texture_init(struct pipe_context *pctx)
+{
+        pctx->create_sampler_state = fd4_sampler_state_create;
+        pctx->bind_sampler_states = fd_sampler_states_bind;
+        pctx->create_sampler_view = fd4_sampler_view_create;
+        pctx->set_sampler_views = fd4_set_sampler_views;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
 ,0 → 1,67
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_TEXTURE_H_
+#define FD4_TEXTURE_H_
+#include "pipe/p_context.h"
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+#include "fd4_context.h"
+#include "fd4_format.h"
+struct fd4_sampler_stateobj {
+        struct pipe_sampler_state base;
+        uint32_t texsamp0, texsamp1;
+};
+static INLINE struct fd4_sampler_stateobj *
+fd4_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+        return (struct fd4_sampler_stateobj *)samp;
+}
+struct fd4_pipe_sampler_view {
+        struct pipe_sampler_view base;
+        uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
+};
+static INLINE struct fd4_pipe_sampler_view *
+fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+        return (struct fd4_pipe_sampler_view *)pview;
+}
+unsigned fd4_get_const_idx(struct fd_context *ctx,
+                struct fd_texture_stateobj *tex, unsigned samp_id);
+void fd4_texture_init(struct pipe_context *pctx);
+#endif /* FD4_TEXTURE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c
 ,0 → 1,109
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "fd4_zsa.h"
+#include "fd4_context.h"
+#include "fd4_format.h"
+void *
+fd4_zsa_state_create(struct pipe_context *pctx,
+                const struct pipe_depth_stencil_alpha_state *cso)
+{
+        struct fd4_zsa_stateobj *so;
+        so = CALLOC_STRUCT(fd4_zsa_stateobj);
+        if (!so)
+                return NULL;
+        so->base = *cso;
+        so->rb_depth_control |=
+                        A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+        if (cso->depth.enabled)
+                so->rb_depth_control |=
+                        A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                        A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+        if (cso->depth.writemask)
+                so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+        if (cso->stencil[0].enabled) {
+                const struct pipe_stencil_state *s = &cso->stencil[0];
+                so->rb_stencil_control |=
+                        A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
+                        A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                        A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+                        A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+                        A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+                        A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+                so->rb_stencil_control2 |=
+                        A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER;
+                so->rb_stencilrefmask |=
+xff000000 | /* ??? */
+                        A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+                        A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+                if (cso->stencil[1].enabled) {
+                        const struct pipe_stencil_state *bs = &cso->stencil[1];
+                        so->rb_stencil_control |=
+                                A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+                                A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+                                A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+                                A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+                                A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+                        so->rb_stencilrefmask_bf |=
+xff000000 | /* ??? */
+                                A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
+                                A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
+                }
+        }
+        if (cso->alpha.enabled) {
+                uint32_t ref = cso->alpha.ref_value * 255.0;
+                so->gras_alpha_control =
+                        A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
+                so->rb_alpha_control =
+                        A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
+                        A4XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
+                        A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
+                so->rb_depth_control |=
+                        A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+        }
+        so->rb_render_control = 0x8;  /* XXX */
+        return so;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
 ,0 → 1,59
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FD4_ZSA_H_
+#define FD4_ZSA_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "freedreno_util.h"
+struct fd4_zsa_stateobj {
+        struct pipe_depth_stencil_alpha_state base;
+        uint32_t gras_alpha_control;
+        uint32_t rb_alpha_control;
+        uint32_t rb_render_control;
+        uint32_t rb_depth_control;
+        uint32_t rb_stencil_control;
+        uint32_t rb_stencil_control2;
+        uint32_t rb_stencilrefmask;
+        uint32_t rb_stencilrefmask_bf;
+};
+static INLINE struct fd4_zsa_stateobj *
+fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+        return (struct fd4_zsa_stateobj *)zsa;
+}
+void * fd4_zsa_state_create(struct pipe_context *pctx,
+                const struct pipe_depth_stencil_alpha_state *cso);
+#endif /* FD4_ZSA_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/adreno_common.xml.h
 ,0 → 1,446
+#ifndef ADRENO_COMMON_XML
+#define ADRENO_COMMON_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+Copyright (C) 2013-2014 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+enum adreno_pa_su_sc_draw {
+        PC_DRAW_POINTS = 0,
+        PC_DRAW_LINES = 1,
+        PC_DRAW_TRIANGLES = 2,
+};
+enum adreno_compare_func {
+        FUNC_NEVER = 0,
+        FUNC_LESS = 1,
+        FUNC_EQUAL = 2,
+        FUNC_LEQUAL = 3,
+        FUNC_GREATER = 4,
+        FUNC_NOTEQUAL = 5,
+        FUNC_GEQUAL = 6,
+        FUNC_ALWAYS = 7,
+};
+enum adreno_stencil_op {
+        STENCIL_KEEP = 0,
+        STENCIL_ZERO = 1,
+        STENCIL_REPLACE = 2,
+        STENCIL_INCR_CLAMP = 3,
+        STENCIL_DECR_CLAMP = 4,
+        STENCIL_INVERT = 5,
+        STENCIL_INCR_WRAP = 6,
+        STENCIL_DECR_WRAP = 7,
+};
+enum adreno_rb_blend_factor {
+        FACTOR_ZERO = 0,
+        FACTOR_ONE = 1,
+        FACTOR_SRC_COLOR = 4,
+        FACTOR_ONE_MINUS_SRC_COLOR = 5,
+        FACTOR_SRC_ALPHA = 6,
+        FACTOR_ONE_MINUS_SRC_ALPHA = 7,
+        FACTOR_DST_COLOR = 8,
+        FACTOR_ONE_MINUS_DST_COLOR = 9,
+        FACTOR_DST_ALPHA = 10,
+        FACTOR_ONE_MINUS_DST_ALPHA = 11,
+        FACTOR_CONSTANT_COLOR = 12,
+        FACTOR_ONE_MINUS_CONSTANT_COLOR = 13,
+        FACTOR_CONSTANT_ALPHA = 14,
+        FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15,
+        FACTOR_SRC_ALPHA_SATURATE = 16,
+};
+enum adreno_rb_surface_endian {
+        ENDIAN_NONE = 0,
+        ENDIAN_8IN16 = 1,
+        ENDIAN_8IN32 = 2,
+        ENDIAN_16IN32 = 3,
+        ENDIAN_8IN64 = 4,
+        ENDIAN_8IN128 = 5,
+};
+enum adreno_rb_dither_mode {
+        DITHER_DISABLE = 0,
+        DITHER_ALWAYS = 1,
+        DITHER_IF_ALPHA_OFF = 2,
+};
+enum adreno_rb_depth_format {
+        DEPTHX_16 = 0,
+        DEPTHX_24_8 = 1,
+        DEPTHX_32 = 2,
+};
+enum adreno_rb_copy_control_mode {
+        RB_COPY_RESOLVE = 1,
+        RB_COPY_CLEAR = 2,
+        RB_COPY_DEPTH_STENCIL = 5,
+};
+enum a3xx_render_mode {
+        RB_RENDERING_PASS = 0,
+        RB_TILING_PASS = 1,
+        RB_RESOLVE_PASS = 2,
+        RB_COMPUTE_PASS = 3,
+};
+enum a3xx_msaa_samples {
+        MSAA_ONE = 0,
+        MSAA_TWO = 1,
+        MSAA_FOUR = 2,
+};
+enum a3xx_threadmode {
+        MULTI = 0,
+        SINGLE = 1,
+};
+enum a3xx_instrbuffermode {
+        CACHE = 0,
+        BUFFER = 1,
+};
+enum a3xx_threadsize {
+        TWO_QUADS = 0,
+        FOUR_QUADS = 1,
+};
+enum a3xx_color_swap {
+        WZYX = 0,
+        WXYZ = 1,
+        ZYXW = 2,
+        XYZW = 3,
+};
+#define REG_AXXX_CP_RB_BASE                                     0x000001c0
+#define REG_AXXX_CP_RB_CNTL                                     0x000001c1
+#define AXXX_CP_RB_CNTL_BUFSZ__MASK                             0x0000003f
+#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT                            0
+static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val)
+{
+        return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BLKSZ__MASK                             0x00003f00
+#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT                            8
+static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val)
+{
+        return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK                          0x00030000
+#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT                         16
+static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val)
+{
+        return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK;
+}
+#define AXXX_CP_RB_CNTL_POLL_EN                                 0x00100000
+#define AXXX_CP_RB_CNTL_NO_UPDATE                               0x08000000
+#define AXXX_CP_RB_CNTL_RPTR_WR_EN                              0x80000000
+#define REG_AXXX_CP_RB_RPTR_ADDR                                0x000001c3
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK                         0x00000003
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT                        0
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val)
+{
+        return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK;
+}
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK                         0xfffffffc
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT                        2
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val)
+{
+        return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK;
+}
+#define REG_AXXX_CP_RB_RPTR                                     0x000001c4
+#define REG_AXXX_CP_RB_WPTR                                     0x000001c5
+#define REG_AXXX_CP_RB_WPTR_DELAY                               0x000001c6
+#define REG_AXXX_CP_RB_RPTR_WR                                  0x000001c7
+#define REG_AXXX_CP_RB_WPTR_BASE                                0x000001c8
+#define REG_AXXX_CP_QUEUE_THRESHOLDS                            0x000001d5
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK            0x0000000f
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT           0
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val)
+{
+        return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK            0x00000f00
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT           8
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val)
+{
+        return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK             0x000f0000
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT            16
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val)
+{
+        return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK;
+}
+#define REG_AXXX_CP_MEQ_THRESHOLDS                              0x000001d6
+#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK                    0x001f0000
+#define AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT                   16
+static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_MEQ_END(uint32_t val)
+{
+        return ((val) << AXXX_CP_MEQ_THRESHOLDS_MEQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_MEQ_END__MASK;
+}
+#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK                    0x1f000000
+#define AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT                   24
+static inline uint32_t AXXX_CP_MEQ_THRESHOLDS_ROQ_END(uint32_t val)
+{
+        return ((val) << AXXX_CP_MEQ_THRESHOLDS_ROQ_END__SHIFT) & AXXX_CP_MEQ_THRESHOLDS_ROQ_END__MASK;
+}
+#define REG_AXXX_CP_CSQ_AVAIL                                   0x000001d7
+#define AXXX_CP_CSQ_AVAIL_RING__MASK                            0x0000007f
+#define AXXX_CP_CSQ_AVAIL_RING__SHIFT                           0
+static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB1__MASK                             0x00007f00
+#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT                            8
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB2__MASK                             0x007f0000
+#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT                            16
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK;
+}
+#define REG_AXXX_CP_STQ_AVAIL                                   0x000001d8
+#define AXXX_CP_STQ_AVAIL_ST__MASK                              0x0000007f
+#define AXXX_CP_STQ_AVAIL_ST__SHIFT                             0
+static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val)
+{
+        return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK;
+}
+#define REG_AXXX_CP_MEQ_AVAIL                                   0x000001d9
+#define AXXX_CP_MEQ_AVAIL_MEQ__MASK                             0x0000001f
+#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT                            0
+static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val)
+{
+        return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK;
+}
+#define REG_AXXX_SCRATCH_UMSK                                   0x000001dc
+#define AXXX_SCRATCH_UMSK_UMSK__MASK                            0x000000ff
+#define AXXX_SCRATCH_UMSK_UMSK__SHIFT                           0
+static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val)
+{
+        return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK;
+}
+#define AXXX_SCRATCH_UMSK_SWAP__MASK                            0x00030000
+#define AXXX_SCRATCH_UMSK_SWAP__SHIFT                           16
+static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val)
+{
+        return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK;
+}
+#define REG_AXXX_SCRATCH_ADDR                                   0x000001dd
+#define REG_AXXX_CP_ME_RDADDR                                   0x000001ea
+#define REG_AXXX_CP_STATE_DEBUG_INDEX                           0x000001ec
+#define REG_AXXX_CP_STATE_DEBUG_DATA                            0x000001ed
+#define REG_AXXX_CP_INT_CNTL                                    0x000001f2
+#define REG_AXXX_CP_INT_STATUS                                  0x000001f3
+#define REG_AXXX_CP_INT_ACK                                     0x000001f4
+#define REG_AXXX_CP_ME_CNTL                                     0x000001f6
+#define AXXX_CP_ME_CNTL_BUSY                                    0x20000000
+#define AXXX_CP_ME_CNTL_HALT                                    0x10000000
+#define REG_AXXX_CP_ME_STATUS                                   0x000001f7
+#define REG_AXXX_CP_ME_RAM_WADDR                                0x000001f8
+#define REG_AXXX_CP_ME_RAM_RADDR                                0x000001f9
+#define REG_AXXX_CP_ME_RAM_DATA                                 0x000001fa
+#define REG_AXXX_CP_DEBUG                                       0x000001fc
+#define AXXX_CP_DEBUG_PREDICATE_DISABLE                         0x00800000
+#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE                       0x01000000
+#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE                   0x02000000
+#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS                        0x04000000
+#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE                       0x08000000
+#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE                    0x10000000
+#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL                    0x40000000
+#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE                    0x80000000
+#define REG_AXXX_CP_CSQ_RB_STAT                                 0x000001fd
+#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK                          0x0000007f
+#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT                         0
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK                          0x007f0000
+#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT                         16
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK;
+}
+#define REG_AXXX_CP_CSQ_IB1_STAT                                0x000001fe
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK                         0x0000007f
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT                        0
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK                         0x007f0000
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT                        16
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK;
+}
+#define REG_AXXX_CP_CSQ_IB2_STAT                                0x000001ff
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK                         0x0000007f
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT                        0
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK                         0x007f0000
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT                        16
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val)
+{
+        return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK;
+}
+#define REG_AXXX_CP_NON_PREFETCH_CNTRS                          0x00000440
+#define REG_AXXX_CP_STQ_ST_STAT                                 0x00000443
+#define REG_AXXX_CP_ST_BASE                                     0x0000044d
+#define REG_AXXX_CP_ST_BUFSZ                                    0x0000044e
+#define REG_AXXX_CP_MEQ_STAT                                    0x0000044f
+#define REG_AXXX_CP_MIU_TAG_STAT                                0x00000452
+#define REG_AXXX_CP_BIN_MASK_LO                                 0x00000454
+#define REG_AXXX_CP_BIN_MASK_HI                                 0x00000455
+#define REG_AXXX_CP_BIN_SELECT_LO                               0x00000456
+#define REG_AXXX_CP_BIN_SELECT_HI                               0x00000457
+#define REG_AXXX_CP_IB1_BASE                                    0x00000458
+#define REG_AXXX_CP_IB1_BUFSZ                                   0x00000459
+#define REG_AXXX_CP_IB2_BASE                                    0x0000045a
+#define REG_AXXX_CP_IB2_BUFSZ                                   0x0000045b
+#define REG_AXXX_CP_STAT                                        0x0000047f
+#define REG_AXXX_CP_SCRATCH_REG0                                0x00000578
+#define REG_AXXX_CP_SCRATCH_REG1                                0x00000579
+#define REG_AXXX_CP_SCRATCH_REG2                                0x0000057a
+#define REG_AXXX_CP_SCRATCH_REG3                                0x0000057b
+#define REG_AXXX_CP_SCRATCH_REG4                                0x0000057c
+#define REG_AXXX_CP_SCRATCH_REG5                                0x0000057d
+#define REG_AXXX_CP_SCRATCH_REG6                                0x0000057e
+#define REG_AXXX_CP_SCRATCH_REG7                                0x0000057f
+#define REG_AXXX_CP_ME_VS_EVENT_SRC                             0x00000600
+#define REG_AXXX_CP_ME_VS_EVENT_ADDR                            0x00000601
+#define REG_AXXX_CP_ME_VS_EVENT_DATA                            0x00000602
+#define REG_AXXX_CP_ME_VS_EVENT_ADDR_SWM                        0x00000603
+#define REG_AXXX_CP_ME_VS_EVENT_DATA_SWM                        0x00000604
+#define REG_AXXX_CP_ME_PS_EVENT_SRC                             0x00000605
+#define REG_AXXX_CP_ME_PS_EVENT_ADDR                            0x00000606
+#define REG_AXXX_CP_ME_PS_EVENT_DATA                            0x00000607
+#define REG_AXXX_CP_ME_PS_EVENT_ADDR_SWM                        0x00000608
+#define REG_AXXX_CP_ME_PS_EVENT_DATA_SWM                        0x00000609
+#define REG_AXXX_CP_ME_CF_EVENT_SRC                             0x0000060a
+#define REG_AXXX_CP_ME_CF_EVENT_ADDR                            0x0000060b
+#define REG_AXXX_CP_ME_CF_EVENT_DATA                            0x0000060c
+#define REG_AXXX_CP_ME_NRT_ADDR                                 0x0000060d
+#define REG_AXXX_CP_ME_NRT_DATA                                 0x0000060e
+#define REG_AXXX_CP_ME_VS_FETCH_DONE_SRC                        0x00000612
+#define REG_AXXX_CP_ME_VS_FETCH_DONE_ADDR                       0x00000613
+#define REG_AXXX_CP_ME_VS_FETCH_DONE_DATA                       0x00000614
+#endif /* ADRENO_COMMON_XML */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/adreno_pm4.xml.h
 ,0 → 1,502
+#ifndef ADRENO_PM4_XML
+#define ADRENO_PM4_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/freedreno/envytools/
+git clone https://github.com/freedreno/envytools.git
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2013-11-30 14:47:15)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14895 bytes, from 2015-04-19 15:23:28)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  66709 bytes, from 2015-04-12 18:16:35)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  59314 bytes, from 2015-04-19 16:21:40)
+Copyright (C) 2013-2015 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+enum vgt_event_type {
+        VS_DEALLOC = 0,
+        PS_DEALLOC = 1,
+        VS_DONE_TS = 2,
+        PS_DONE_TS = 3,
+        CACHE_FLUSH_TS = 4,
+        CONTEXT_DONE = 5,
+        CACHE_FLUSH = 6,
+        HLSQ_FLUSH = 7,
+        VIZQUERY_START = 7,
+        VIZQUERY_END = 8,
+        SC_WAIT_WC = 9,
+        RST_PIX_CNT = 13,
+        RST_VTX_CNT = 14,
+        TILE_FLUSH = 15,
+        CACHE_FLUSH_AND_INV_TS_EVENT = 20,
+        ZPASS_DONE = 21,
+        CACHE_FLUSH_AND_INV_EVENT = 22,
+        PERFCOUNTER_START = 23,
+        PERFCOUNTER_STOP = 24,
+        VS_FETCH_DONE = 27,
+        FACENESS_FLUSH = 28,
+};
+enum pc_di_primtype {
+        DI_PT_NONE = 0,
+        DI_PT_POINTLIST_A2XX = 1,
+        DI_PT_LINELIST = 2,
+        DI_PT_LINESTRIP = 3,
+        DI_PT_TRILIST = 4,
+        DI_PT_TRIFAN = 5,
+        DI_PT_TRISTRIP = 6,
+        DI_PT_LINELOOP = 7,
+        DI_PT_RECTLIST = 8,
+        DI_PT_POINTLIST_A3XX = 9,
+        DI_PT_LINE_ADJ = 10,
+        DI_PT_LINESTRIP_ADJ = 11,
+        DI_PT_TRI_ADJ = 12,
+        DI_PT_TRISTRIP_ADJ = 13,
+        DI_PT_PATCHES = 34,
+};
+enum pc_di_src_sel {
+        DI_SRC_SEL_DMA = 0,
+        DI_SRC_SEL_IMMEDIATE = 1,
+        DI_SRC_SEL_AUTO_INDEX = 2,
+        DI_SRC_SEL_RESERVED = 3,
+};
+enum pc_di_index_size {
+        INDEX_SIZE_IGN = 0,
+        INDEX_SIZE_16_BIT = 0,
+        INDEX_SIZE_32_BIT = 1,
+        INDEX_SIZE_8_BIT = 2,
+        INDEX_SIZE_INVALID = 0,
+};
+enum pc_di_vis_cull_mode {
+        IGNORE_VISIBILITY = 0,
+        USE_VISIBILITY = 1,
+};
+enum adreno_pm4_packet_type {
+        CP_TYPE0_PKT = 0,
+        CP_TYPE1_PKT = 0x40000000,
+        CP_TYPE2_PKT = 0x80000000,
+        CP_TYPE3_PKT = 0xc0000000,
+};
+enum adreno_pm4_type3_packets {
+        CP_ME_INIT = 72,
+        CP_NOP = 16,
+        CP_INDIRECT_BUFFER = 63,
+        CP_INDIRECT_BUFFER_PFD = 55,
+        CP_WAIT_FOR_IDLE = 38,
+        CP_WAIT_REG_MEM = 60,
+        CP_WAIT_REG_EQ = 82,
+        CP_WAIT_REG_GTE = 83,
+        CP_WAIT_UNTIL_READ = 92,
+        CP_WAIT_IB_PFD_COMPLETE = 93,
+        CP_REG_RMW = 33,
+        CP_SET_BIN_DATA = 47,
+        CP_REG_TO_MEM = 62,
+        CP_MEM_WRITE = 61,
+        CP_MEM_WRITE_CNTR = 79,
+        CP_COND_EXEC = 68,
+        CP_COND_WRITE = 69,
+        CP_EVENT_WRITE = 70,
+        CP_EVENT_WRITE_SHD = 88,
+        CP_EVENT_WRITE_CFL = 89,
+        CP_EVENT_WRITE_ZPD = 91,
+        CP_RUN_OPENCL = 49,
+        CP_DRAW_INDX = 34,
+        CP_DRAW_INDX_2 = 54,
+        CP_DRAW_INDX_BIN = 52,
+        CP_DRAW_INDX_2_BIN = 53,
+        CP_VIZ_QUERY = 35,
+        CP_SET_STATE = 37,
+        CP_SET_CONSTANT = 45,
+        CP_IM_LOAD = 39,
+        CP_IM_LOAD_IMMEDIATE = 43,
+        CP_LOAD_CONSTANT_CONTEXT = 46,
+        CP_INVALIDATE_STATE = 59,
+        CP_SET_SHADER_BASES = 74,
+        CP_SET_BIN_MASK = 80,
+        CP_SET_BIN_SELECT = 81,
+        CP_CONTEXT_UPDATE = 94,
+        CP_INTERRUPT = 64,
+        CP_IM_STORE = 44,
+        CP_SET_DRAW_INIT_FLAGS = 75,
+        CP_SET_PROTECTED_MODE = 95,
+        CP_BOOTSTRAP_UCODE = 111,
+        CP_LOAD_STATE = 48,
+        CP_COND_INDIRECT_BUFFER_PFE = 58,
+        CP_COND_INDIRECT_BUFFER_PFD = 50,
+        CP_INDIRECT_BUFFER_PFE = 63,
+        CP_SET_BIN = 76,
+        CP_TEST_TWO_MEMS = 113,
+        CP_REG_WR_NO_CTXT = 120,
+        CP_RECORD_PFP_TIMESTAMP = 17,
+        CP_WAIT_FOR_ME = 19,
+        CP_SET_DRAW_STATE = 67,
+        CP_DRAW_INDX_OFFSET = 56,
+        CP_DRAW_INDIRECT = 40,
+        CP_DRAW_INDX_INDIRECT = 41,
+        CP_DRAW_AUTO = 36,
+        CP_UNKNOWN_19 = 25,
+        CP_UNKNOWN_1A = 26,
+        CP_UNKNOWN_4E = 78,
+        CP_WIDE_REG_WRITE = 116,
+        IN_IB_PREFETCH_END = 23,
+        IN_SUBBLK_PREFETCH = 31,
+        IN_INSTR_PREFETCH = 32,
+        IN_INSTR_MATCH = 71,
+        IN_CONST_PREFETCH = 73,
+        IN_INCR_UPDT_STATE = 85,
+        IN_INCR_UPDT_CONST = 86,
+        IN_INCR_UPDT_INSTR = 87,
+};
+enum adreno_state_block {
+        SB_VERT_TEX = 0,
+        SB_VERT_MIPADDR = 1,
+        SB_FRAG_TEX = 2,
+        SB_FRAG_MIPADDR = 3,
+        SB_VERT_SHADER = 4,
+        SB_GEOM_SHADER = 5,
+        SB_FRAG_SHADER = 6,
+};
+enum adreno_state_type {
+        ST_SHADER = 0,
+        ST_CONSTANTS = 1,
+};
+enum adreno_state_src {
+        SS_DIRECT = 0,
+        SS_INDIRECT = 4,
+};
+enum a4xx_index_size {
+        INDEX4_SIZE_8_BIT = 0,
+        INDEX4_SIZE_16_BIT = 1,
+        INDEX4_SIZE_32_BIT = 2,
+};
+#define REG_CP_LOAD_STATE_0                                     0x00000000
+#define CP_LOAD_STATE_0_DST_OFF__MASK                           0x0000ffff
+#define CP_LOAD_STATE_0_DST_OFF__SHIFT                          0
+static inline uint32_t CP_LOAD_STATE_0_DST_OFF(uint32_t val)
+{
+        return ((val) << CP_LOAD_STATE_0_DST_OFF__SHIFT) & CP_LOAD_STATE_0_DST_OFF__MASK;
+}
+#define CP_LOAD_STATE_0_STATE_SRC__MASK                         0x00070000
+#define CP_LOAD_STATE_0_STATE_SRC__SHIFT                        16
+static inline uint32_t CP_LOAD_STATE_0_STATE_SRC(enum adreno_state_src val)
+{
+        return ((val) << CP_LOAD_STATE_0_STATE_SRC__SHIFT) & CP_LOAD_STATE_0_STATE_SRC__MASK;
+}
+#define CP_LOAD_STATE_0_STATE_BLOCK__MASK                       0x00380000
+#define CP_LOAD_STATE_0_STATE_BLOCK__SHIFT                      19
+static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val)
+{
+        return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK;
+}
+#define CP_LOAD_STATE_0_NUM_UNIT__MASK                          0x7fc00000
+#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT                         22
+static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val)
+{
+        return ((val) << CP_LOAD_STATE_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE_0_NUM_UNIT__MASK;
+}
+#define REG_CP_LOAD_STATE_1                                     0x00000001
+#define CP_LOAD_STATE_1_STATE_TYPE__MASK                        0x00000003
+#define CP_LOAD_STATE_1_STATE_TYPE__SHIFT                       0
+static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val)
+{
+        return ((val) << CP_LOAD_STATE_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE_1_STATE_TYPE__MASK;
+}
+#define CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK                      0xfffffffc
+#define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT                     2
+static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val)
+{
+        return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK;
+}
+#define REG_CP_DRAW_INDX_0                                      0x00000000
+#define CP_DRAW_INDX_0_VIZ_QUERY__MASK                          0xffffffff
+#define CP_DRAW_INDX_0_VIZ_QUERY__SHIFT                         0
+static inline uint32_t CP_DRAW_INDX_0_VIZ_QUERY(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_0_VIZ_QUERY__MASK;
+}
+#define REG_CP_DRAW_INDX_1                                      0x00000001
+#define CP_DRAW_INDX_1_PRIM_TYPE__MASK                          0x0000003f
+#define CP_DRAW_INDX_1_PRIM_TYPE__SHIFT                         0
+static inline uint32_t CP_DRAW_INDX_1_PRIM_TYPE(enum pc_di_primtype val)
+{
+        return ((val) << CP_DRAW_INDX_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_1_PRIM_TYPE__MASK;
+}
+#define CP_DRAW_INDX_1_SOURCE_SELECT__MASK                      0x000000c0
+#define CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT                     6
+static inline uint32_t CP_DRAW_INDX_1_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+        return ((val) << CP_DRAW_INDX_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_1_SOURCE_SELECT__MASK;
+}
+#define CP_DRAW_INDX_1_VIS_CULL__MASK                           0x00000600
+#define CP_DRAW_INDX_1_VIS_CULL__SHIFT                          9
+static inline uint32_t CP_DRAW_INDX_1_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+        return ((val) << CP_DRAW_INDX_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_1_VIS_CULL__MASK;
+}
+#define CP_DRAW_INDX_1_INDEX_SIZE__MASK                         0x00000800
+#define CP_DRAW_INDX_1_INDEX_SIZE__SHIFT                        11
+static inline uint32_t CP_DRAW_INDX_1_INDEX_SIZE(enum pc_di_index_size val)
+{
+        return ((val) << CP_DRAW_INDX_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_1_INDEX_SIZE__MASK;
+}
+#define CP_DRAW_INDX_1_NOT_EOP                                  0x00001000
+#define CP_DRAW_INDX_1_SMALL_INDEX                              0x00002000
+#define CP_DRAW_INDX_1_PRE_DRAW_INITIATOR_ENABLE                0x00004000
+#define CP_DRAW_INDX_1_NUM_INSTANCES__MASK                      0xff000000
+#define CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT                     24
+static inline uint32_t CP_DRAW_INDX_1_NUM_INSTANCES(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_1_NUM_INSTANCES__MASK;
+}
+#define REG_CP_DRAW_INDX_2                                      0x00000002
+#define CP_DRAW_INDX_2_NUM_INDICES__MASK                        0xffffffff
+#define CP_DRAW_INDX_2_NUM_INDICES__SHIFT                       0
+static inline uint32_t CP_DRAW_INDX_2_NUM_INDICES(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_NUM_INDICES__MASK;
+}
+#define REG_CP_DRAW_INDX_3                                      0x00000003
+#define CP_DRAW_INDX_3_INDX_BASE__MASK                          0xffffffff
+#define CP_DRAW_INDX_3_INDX_BASE__SHIFT                         0
+static inline uint32_t CP_DRAW_INDX_3_INDX_BASE(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_3_INDX_BASE__SHIFT) & CP_DRAW_INDX_3_INDX_BASE__MASK;
+}
+#define REG_CP_DRAW_INDX_4                                      0x00000004
+#define CP_DRAW_INDX_4_INDX_SIZE__MASK                          0xffffffff
+#define CP_DRAW_INDX_4_INDX_SIZE__SHIFT                         0
+static inline uint32_t CP_DRAW_INDX_4_INDX_SIZE(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_4_INDX_SIZE__SHIFT) & CP_DRAW_INDX_4_INDX_SIZE__MASK;
+}
+#define REG_CP_DRAW_INDX_2_0                                    0x00000000
+#define CP_DRAW_INDX_2_0_VIZ_QUERY__MASK                        0xffffffff
+#define CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT                       0
+static inline uint32_t CP_DRAW_INDX_2_0_VIZ_QUERY(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_2_0_VIZ_QUERY__SHIFT) & CP_DRAW_INDX_2_0_VIZ_QUERY__MASK;
+}
+#define REG_CP_DRAW_INDX_2_1                                    0x00000001
+#define CP_DRAW_INDX_2_1_PRIM_TYPE__MASK                        0x0000003f
+#define CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT                       0
+static inline uint32_t CP_DRAW_INDX_2_1_PRIM_TYPE(enum pc_di_primtype val)
+{
+        return ((val) << CP_DRAW_INDX_2_1_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_2_1_PRIM_TYPE__MASK;
+}
+#define CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK                    0x000000c0
+#define CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT                   6
+static inline uint32_t CP_DRAW_INDX_2_1_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+        return ((val) << CP_DRAW_INDX_2_1_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_2_1_SOURCE_SELECT__MASK;
+}
+#define CP_DRAW_INDX_2_1_VIS_CULL__MASK                         0x00000600
+#define CP_DRAW_INDX_2_1_VIS_CULL__SHIFT                        9
+static inline uint32_t CP_DRAW_INDX_2_1_VIS_CULL(enum pc_di_vis_cull_mode val)
+{
+        return ((val) << CP_DRAW_INDX_2_1_VIS_CULL__SHIFT) & CP_DRAW_INDX_2_1_VIS_CULL__MASK;
+}
+#define CP_DRAW_INDX_2_1_INDEX_SIZE__MASK                       0x00000800
+#define CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT                      11
+static inline uint32_t CP_DRAW_INDX_2_1_INDEX_SIZE(enum pc_di_index_size val)
+{
+        return ((val) << CP_DRAW_INDX_2_1_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_2_1_INDEX_SIZE__MASK;
+}
+#define CP_DRAW_INDX_2_1_NOT_EOP                                0x00001000
+#define CP_DRAW_INDX_2_1_SMALL_INDEX                            0x00002000
+#define CP_DRAW_INDX_2_1_PRE_DRAW_INITIATOR_ENABLE              0x00004000
+#define CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK                    0xff000000
+#define CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT                   24
+static inline uint32_t CP_DRAW_INDX_2_1_NUM_INSTANCES(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_2_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_2_1_NUM_INSTANCES__MASK;
+}
+#define REG_CP_DRAW_INDX_2_2                                    0x00000002
+#define CP_DRAW_INDX_2_2_NUM_INDICES__MASK                      0xffffffff
+#define CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT                     0
+static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK;
+}
+#define REG_CP_DRAW_INDX_OFFSET_0                               0x00000000
+#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK                   0x0000003f
+#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT                  0
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK               0x000000c0
+#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT              6
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_TESSELLATE                        0x00000100
+#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK                  0x00000c00
+#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT                 10
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum a4xx_index_size val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK;
+}
+#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK                   0x01f00000
+#define CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT                  20
+static inline uint32_t CP_DRAW_INDX_OFFSET_0_TESS_MODE(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_0_TESS_MODE__SHIFT) & CP_DRAW_INDX_OFFSET_0_TESS_MODE__MASK;
+}
+#define REG_CP_DRAW_INDX_OFFSET_1                               0x00000001
+#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK               0xffffffff
+#define CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT              0
+static inline uint32_t CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__SHIFT) & CP_DRAW_INDX_OFFSET_1_NUM_INSTANCES__MASK;
+}
+#define REG_CP_DRAW_INDX_OFFSET_2                               0x00000002
+#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK                 0xffffffff
+#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT                0
+static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK;
+}
+#define REG_CP_DRAW_INDX_OFFSET_3                               0x00000003
+#define REG_CP_DRAW_INDX_OFFSET_4                               0x00000004
+#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK                   0xffffffff
+#define CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT                  0
+static inline uint32_t CP_DRAW_INDX_OFFSET_4_INDX_BASE(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_4_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_4_INDX_BASE__MASK;
+}
+#define REG_CP_DRAW_INDX_OFFSET_5                               0x00000005
+#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK                   0xffffffff
+#define CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT                  0
+static inline uint32_t CP_DRAW_INDX_OFFSET_5_INDX_SIZE(uint32_t val)
+{
+        return ((val) << CP_DRAW_INDX_OFFSET_5_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_5_INDX_SIZE__MASK;
+}
+#define REG_CP_SET_DRAW_STATE_0                                 0x00000000
+#define CP_SET_DRAW_STATE_0_COUNT__MASK                         0x0000ffff
+#define CP_SET_DRAW_STATE_0_COUNT__SHIFT                        0
+static inline uint32_t CP_SET_DRAW_STATE_0_COUNT(uint32_t val)
+{
+        return ((val) << CP_SET_DRAW_STATE_0_COUNT__SHIFT) & CP_SET_DRAW_STATE_0_COUNT__MASK;
+}
+#define CP_SET_DRAW_STATE_0_DIRTY                               0x00010000
+#define CP_SET_DRAW_STATE_0_DISABLE                             0x00020000
+#define CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS                  0x00040000
+#define CP_SET_DRAW_STATE_0_LOAD_IMMED                          0x00080000
+#define CP_SET_DRAW_STATE_0_GROUP_ID__MASK                      0x1f000000
+#define CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT                     24
+static inline uint32_t CP_SET_DRAW_STATE_0_GROUP_ID(uint32_t val)
+{
+        return ((val) << CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE_0_GROUP_ID__MASK;
+}
+#define REG_CP_SET_DRAW_STATE_1                                 0x00000001
+#define CP_SET_DRAW_STATE_1_ADDR__MASK                          0xffffffff
+#define CP_SET_DRAW_STATE_1_ADDR__SHIFT                         0
+static inline uint32_t CP_SET_DRAW_STATE_1_ADDR(uint32_t val)
+{
+        return ((val) << CP_SET_DRAW_STATE_1_ADDR__SHIFT) & CP_SET_DRAW_STATE_1_ADDR__MASK;
+}
+#define REG_CP_SET_BIN_0                                        0x00000000
+#define REG_CP_SET_BIN_1                                        0x00000001
+#define CP_SET_BIN_1_X1__MASK                                   0x0000ffff
+#define CP_SET_BIN_1_X1__SHIFT                                  0
+static inline uint32_t CP_SET_BIN_1_X1(uint32_t val)
+{
+        return ((val) << CP_SET_BIN_1_X1__SHIFT) & CP_SET_BIN_1_X1__MASK;
+}
+#define CP_SET_BIN_1_Y1__MASK                                   0xffff0000
+#define CP_SET_BIN_1_Y1__SHIFT                                  16
+static inline uint32_t CP_SET_BIN_1_Y1(uint32_t val)
+{
+        return ((val) << CP_SET_BIN_1_Y1__SHIFT) & CP_SET_BIN_1_Y1__MASK;
+}
+#define REG_CP_SET_BIN_2                                        0x00000002
+#define CP_SET_BIN_2_X2__MASK                                   0x0000ffff
+#define CP_SET_BIN_2_X2__SHIFT                                  0
+static inline uint32_t CP_SET_BIN_2_X2(uint32_t val)
+{
+        return ((val) << CP_SET_BIN_2_X2__SHIFT) & CP_SET_BIN_2_X2__MASK;
+}
+#define CP_SET_BIN_2_Y2__MASK                                   0xffff0000
+#define CP_SET_BIN_2_Y2__SHIFT                                  16
+static inline uint32_t CP_SET_BIN_2_Y2(uint32_t val)
+{
+        return ((val) << CP_SET_BIN_2_Y2__SHIFT) & CP_SET_BIN_2_Y2__MASK;
+}
+#define REG_CP_SET_BIN_DATA_0                                   0x00000000
+#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK                   0xffffffff
+#define CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT                  0
+static inline uint32_t CP_SET_BIN_DATA_0_BIN_DATA_ADDR(uint32_t val)
+{
+        return ((val) << CP_SET_BIN_DATA_0_BIN_DATA_ADDR__SHIFT) & CP_SET_BIN_DATA_0_BIN_DATA_ADDR__MASK;
+}
+#define REG_CP_SET_BIN_DATA_1                                   0x00000001
+#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK                0xffffffff
+#define CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT               0
+static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
+{
+        return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
+}
+#endif /* ADRENO_PM4_XML */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/disasm.h
 ,0 → 1,43
+/*
+ * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef DISASM_H_
+#define DISASM_H_
+enum shader_t {
+        SHADER_VERTEX,
+        SHADER_FRAGMENT,
+        SHADER_COMPUTE,
+};
+/* bitmask of debug flags */
+enum debug_t {
+        PRINT_RAW      = 0x1,    /* dump raw hexdump */
+        PRINT_VERBOSE  = 0x2,
+};
+int disasm_a2xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type);
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type);
+void disasm_set_debug(enum debug_t debug);
+#endif /* DISASM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_context.c
 ,0 → 1,252
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "freedreno_context.h"
+#include "freedreno_draw.h"
+#include "freedreno_fence.h"
+#include "freedreno_program.h"
+#include "freedreno_resource.h"
+#include "freedreno_texture.h"
+#include "freedreno_state.h"
+#include "freedreno_gmem.h"
+#include "freedreno_query.h"
+#include "freedreno_query_hw.h"
+#include "freedreno_util.h"
+static struct fd_ringbuffer *next_rb(struct fd_context *ctx)
+{
+        struct fd_ringbuffer *ring;
+        uint32_t ts;
+        /* grab next ringbuffer: */
+        ring = ctx->rings[(ctx->rings_idx++) % ARRAY_SIZE(ctx->rings)];
+        /* wait for new rb to be idle: */
+        ts = fd_ringbuffer_timestamp(ring);
+        if (ts) {
+                DBG("wait: %u", ts);
+                fd_pipe_wait(ctx->screen->pipe, ts);
+        }
+        fd_ringbuffer_reset(ring);
+        return ring;
+}
+static void
+fd_context_next_rb(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd_ringbuffer *ring;
+        fd_ringmarker_del(ctx->draw_start);
+        fd_ringmarker_del(ctx->draw_end);
+        ring = next_rb(ctx);
+        ctx->draw_start = fd_ringmarker_new(ring);
+        ctx->draw_end = fd_ringmarker_new(ring);
+        fd_ringbuffer_set_parent(ring, NULL);
+        ctx->ring = ring;
+        fd_ringmarker_del(ctx->binning_start);
+        fd_ringmarker_del(ctx->binning_end);
+        ring = next_rb(ctx);
+        ctx->binning_start = fd_ringmarker_new(ring);
+        ctx->binning_end = fd_ringmarker_new(ring);
+        fd_ringbuffer_set_parent(ring, ctx->ring);
+        ctx->binning_ring = ring;
+}
+/* emit accumulated render cmds, needed for example if render target has
+ * changed, or for flush()
+ */
+void
+fd_context_render(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct fd_resource *rsc, *rsc_tmp;
+        int i;
+        DBG("needs_flush: %d", ctx->needs_flush);
+        if (!ctx->needs_flush)
+                return;
+        fd_gmem_render_tiles(ctx);
+        DBG("%p/%p/%p", ctx->ring->start, ctx->ring->cur, ctx->ring->end);
+        /* if size in dwords is more than half the buffer size, then wait and
+         * wrap around:
+         */
+        if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
+                fd_context_next_rb(pctx);
+        ctx->needs_flush = false;
+        ctx->cleared = ctx->partial_cleared = ctx->restore = ctx->resolve = 0;
+        ctx->gmem_reason = 0;
+        ctx->num_draws = 0;
+        for (i = 0; i < pfb->nr_cbufs; i++)
+                if (pfb->cbufs[i])
+                        fd_resource(pfb->cbufs[i]->texture)->dirty = false;
+        if (pfb->zsbuf) {
+                rsc = fd_resource(pfb->zsbuf->texture);
+                rsc->dirty = false;
+                if (rsc->stencil)
+                        rsc->stencil->dirty = false;
+        }
+        /* go through all the used resources and clear their reading flag */
+        LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list) {
+                assert(rsc->reading);
+                rsc->reading = false;
+                list_delinit(&rsc->list);
+        }
+        assert(LIST_IS_EMPTY(&ctx->used_resources));
+}
+static void
+fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+                unsigned flags)
+{
+        fd_context_render(pctx);
+        if (fence)
+                *fence = fd_fence_create(pctx);
+}
+void
+fd_context_destroy(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        unsigned i;
+        DBG("");
+        fd_prog_fini(pctx);
+        fd_hw_query_fini(pctx);
+        util_dynarray_fini(&ctx->draw_patches);
+        if (ctx->blitter)
+                util_blitter_destroy(ctx->blitter);
+        if (ctx->primconvert)
+                util_primconvert_destroy(ctx->primconvert);
+        util_slab_destroy(&ctx->transfer_pool);
+        fd_ringmarker_del(ctx->draw_start);
+        fd_ringmarker_del(ctx->draw_end);
+        fd_ringmarker_del(ctx->binning_start);
+        fd_ringmarker_del(ctx->binning_end);
+        for (i = 0; i < ARRAY_SIZE(ctx->rings); i++)
+                fd_ringbuffer_del(ctx->rings[i]);
+        for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                if (!pipe->bo)
+                        break;
+                fd_bo_del(pipe->bo);
+        }
+        fd_device_del(ctx->dev);
+        FREE(ctx);
+}
+struct pipe_context *
+fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
+                const uint8_t *primtypes, void *priv)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        struct pipe_context *pctx;
+        int i;
+        ctx->screen = screen;
+        ctx->primtypes = primtypes;
+        ctx->primtype_mask = 0;
+        for (i = 0; i < PIPE_PRIM_MAX; i++)
+                if (primtypes[i])
+                        ctx->primtype_mask |= (1 << i);
+        /* need some sane default in case state tracker doesn't
+         * set some state:
+         */
+        ctx->sample_mask = 0xffff;
+        pctx = &ctx->base;
+        pctx->screen = pscreen;
+        pctx->priv = priv;
+        pctx->flush = fd_context_flush;
+        for (i = 0; i < ARRAY_SIZE(ctx->rings); i++) {
+                ctx->rings[i] = fd_ringbuffer_new(screen->pipe, 0x100000);
+                if (!ctx->rings[i])
+                        goto fail;
+        }
+        fd_context_next_rb(pctx);
+        fd_reset_wfi(ctx);
+        util_dynarray_init(&ctx->draw_patches);
+        util_slab_create(&ctx->transfer_pool, sizeof(struct fd_transfer),
+, UTIL_SLAB_SINGLETHREADED);
+        fd_draw_init(pctx);
+        fd_resource_context_init(pctx);
+        fd_query_context_init(pctx);
+        fd_texture_init(pctx);
+        fd_state_init(pctx);
+        fd_hw_query_init(pctx);
+        ctx->blitter = util_blitter_create(pctx);
+        if (!ctx->blitter)
+                goto fail;
+        ctx->primconvert = util_primconvert_create(pctx, ctx->primtype_mask);
+        if (!ctx->primconvert)
+                goto fail;
+        return pctx;
+fail:
+        pctx->destroy(pctx);
+        return NULL;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_context.h
 ,0 → 1,416
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_CONTEXT_H_
+#define FREEDRENO_CONTEXT_H_
+#include "pipe/p_context.h"
+#include "indices/u_primconvert.h"
+#include "util/u_blitter.h"
+#include "util/list.h"
+#include "util/u_slab.h"
+#include "util/u_string.h"
+#include "freedreno_screen.h"
+#include "freedreno_gmem.h"
+#include "freedreno_util.h"
+struct fd_vertex_stateobj;
+struct fd_texture_stateobj {
+        struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+        unsigned num_textures;
+        struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+        unsigned num_samplers;
+        unsigned dirty_samplers;
+};
+struct fd_program_stateobj {
+        void *vp, *fp;
+        enum {
+                FD_SHADER_DIRTY_VP = (1 << 0),
+                FD_SHADER_DIRTY_FP = (1 << 1),
+        } dirty;
+        uint8_t num_exports;
+        /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
+         * for TGSI_SEMANTIC_GENERIC.  Special vs exports (position and point-
+         * size) are not included in this
+         */
+        uint8_t export_linkage[63];
+};
+struct fd_constbuf_stateobj {
+        struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+        uint32_t enabled_mask;
+        uint32_t dirty_mask;
+};
+struct fd_vertexbuf_stateobj {
+        struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+        unsigned count;
+        uint32_t enabled_mask;
+        uint32_t dirty_mask;
+};
+struct fd_vertex_stateobj {
+        struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+        unsigned num_elements;
+};
+/* group together the vertex and vertexbuf state.. for ease of passing
+ * around, and because various internal operations (gmem<->mem, etc)
+ * need their own vertex state:
+ */
+struct fd_vertex_state {
+        struct fd_vertex_stateobj *vtx;
+        struct fd_vertexbuf_stateobj vertexbuf;
+};
+/* Bitmask of stages in rendering that a particular query query is
+ * active.  Queries will be automatically started/stopped (generating
+ * additional fd_hw_sample_period's) on entrance/exit from stages that
+ * are applicable to the query.
+ *
+ * NOTE: set the stage to NULL at end of IB to ensure no query is still
+ * active.  Things aren't going to work out the way you want if a query
+ * is active across IB's (or between tile IB and draw IB)
+ */
+enum fd_render_stage {
+        FD_STAGE_NULL     = 0x00,
+        FD_STAGE_DRAW     = 0x01,
+        FD_STAGE_CLEAR    = 0x02,
+        /* TODO before queries which include MEM2GMEM or GMEM2MEM will
+         * work we will need to call fd_hw_query_prepare() from somewhere
+         * appropriate so that queries in the tiling IB get backed with
+         * memory to write results to.
+         */
+        FD_STAGE_MEM2GMEM = 0x04,
+        FD_STAGE_GMEM2MEM = 0x08,
+        /* used for driver internal draws (ie. util_blitter_blit()): */
+        FD_STAGE_BLIT     = 0x10,
+};
+#define MAX_HW_SAMPLE_PROVIDERS 4
+struct fd_hw_sample_provider;
+struct fd_hw_sample;
+struct fd_context {
+        struct pipe_context base;
+        struct fd_device *dev;
+        struct fd_screen *screen;
+        struct blitter_context *blitter;
+        struct primconvert_context *primconvert;
+        /* slab for pipe_transfer allocations: */
+        struct util_slab_mempool transfer_pool;
+        /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
+        struct util_slab_mempool sample_pool;
+        struct util_slab_mempool sample_period_pool;
+        /* next sample offset.. incremented for each sample in the batch/
+         * submit, reset to zero on next submit.
+         */
+        uint32_t next_sample_offset;
+        /* sample-providers for hw queries: */
+        const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS];
+        /* cached samples (in case multiple queries need to reference
+         * the same sample snapshot)
+         */
+        struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+        /* tracking for current stage, to know when to start/stop
+         * any active queries:
+         */
+        enum fd_render_stage stage;
+        /* list of active queries: */
+        struct list_head active_queries;
+        /* list of queries that are not active, but were active in the
+         * current submit:
+         */
+        struct list_head current_queries;
+        /* current query result bo and tile stride: */
+        struct fd_bo *query_bo;
+        uint32_t query_tile_stride;
+        /* list of resources used by currently-unsubmitted renders */
+        struct list_head used_resources;
+        /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to
+         * DI_PT_x value to use for draw initiator.  There are some
+         * slight differences between generation:
+         */
+        const uint8_t *primtypes;
+        uint32_t primtype_mask;
+        /* shaders used by clear, and gmem->mem blits: */
+        struct fd_program_stateobj solid_prog; // TODO move to screen?
+        /* shaders used by mem->gmem blits: */
+        struct fd_program_stateobj blit_prog[8]; // TODO move to screen?
+        struct fd_program_stateobj blit_z, blit_zs;
+        /* do we need to mem2gmem before rendering.  We don't, if for example,
+         * there was a glClear() that invalidated the entire previous buffer
+         * contents.  Keep track of which buffer(s) are cleared, or needs
+         * restore.  Masks of PIPE_CLEAR_*
+         *
+         * The 'cleared' bits will be set for buffers which are *entirely*
+         * cleared, and 'partial_cleared' bits will be set if you must
+         * check cleared_scissor.
+         */
+        enum {
+                /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
+                FD_BUFFER_COLOR   = PIPE_CLEAR_COLOR,
+                FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
+                FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
+                FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
+        } cleared, partial_cleared, restore, resolve;
+        bool needs_flush;
+        /* To decide whether to render to system memory, keep track of the
+         * number of draws, and whether any of them require multisample,
+         * depth_test (or depth write), stencil_test, blending, and
+         * color_logic_Op (since those functions are disabled when by-
+         * passing GMEM.
+         */
+        enum {
+                FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01,
+                FD_GMEM_DEPTH_ENABLED        = 0x02,
+                FD_GMEM_STENCIL_ENABLED      = 0x04,
+                FD_GMEM_MSAA_ENABLED         = 0x08,
+                FD_GMEM_BLEND_ENABLED        = 0x10,
+                FD_GMEM_LOGICOP_ENABLED      = 0x20,
+        } gmem_reason;
+        unsigned num_draws;   /* number of draws in current batch */
+        /* Stats/counters:
+         */
+        struct {
+                uint64_t prims_emitted;
+                uint64_t draw_calls;
+                uint64_t batch_total, batch_sysmem, batch_gmem, batch_restore;
+        } stats;
+        /* we can't really sanely deal with wraparound point in ringbuffer
+         * and because of the way tiling works we can't really flush at
+         * arbitrary points (without a big performance hit).  When we get
+         * too close to the end of the current ringbuffer, cycle to the next
+         * one (and wait for pending rendering from next rb to complete).
+         * We want the # of ringbuffers to be high enough that we don't
+         * normally have to wait before resetting to the start of the next
+         * rb.
+         */
+        struct fd_ringbuffer *rings[8];
+        unsigned rings_idx;
+        /* NOTE: currently using a single ringbuffer for both draw and
+         * tiling commands, we need to make sure we need to leave enough
+         * room at the end to append the tiling commands when we flush.
+         * 0x7000 dwords should be a couple times more than we ever need
+         * so should be a nice conservative threshold.
+         */
+#define FD_TILING_COMMANDS_DWORDS 0x7000
+        /* normal draw/clear cmds: */
+        struct fd_ringbuffer *ring;
+        struct fd_ringmarker *draw_start, *draw_end;
+        /* binning pass draw/clear cmds: */
+        struct fd_ringbuffer *binning_ring;
+        struct fd_ringmarker *binning_start, *binning_end;
+        /* Keep track if WAIT_FOR_IDLE is needed for registers we need
+         * to update via RMW:
+         */
+        bool needs_wfi;
+        /* Do we need to re-emit RB_FRAME_BUFFER_DIMENSION?  At least on a3xx
+         * it is not a banked context register, so it needs a WFI to update.
+         * Keep track if it has actually changed, to avoid unneeded WFI.
+         * */
+        bool needs_rb_fbd;
+        /* Keep track of DRAW initiators that need to be patched up depending
+         * on whether we using binning or not:
+         */
+        struct util_dynarray draw_patches;
+        struct pipe_scissor_state scissor;
+        /* we don't have a disable/enable bit for scissor, so instead we keep
+         * a disabled-scissor state which matches the entire bound framebuffer
+         * and use that when scissor is not enabled.
+         */
+        struct pipe_scissor_state disabled_scissor;
+        /* Track the maximal bounds of the scissor of all the draws within a
+         * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
+         * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
+         */
+        struct pipe_scissor_state max_scissor;
+        /* Track the cleared scissor for color/depth/stencil, so we know
+         * which, if any, tiles need to be restored (mem2gmem).  Only valid
+         * if the corresponding bit in ctx->cleared is set.
+         */
+        struct {
+                struct pipe_scissor_state color, depth, stencil;
+        } cleared_scissor;
+        /* Current gmem/tiling configuration.. gets updated on render_tiles()
+         * if out of date with current maximal-scissor/cpp:
+         */
+        struct fd_gmem_stateobj gmem;
+        struct fd_vsc_pipe      pipe[8];
+        struct fd_tile          tile[256];
+        /* which state objects need to be re-emit'd: */
+        enum {
+                FD_DIRTY_BLEND       = (1 <<  0),
+                FD_DIRTY_RASTERIZER  = (1 <<  1),
+                FD_DIRTY_ZSA         = (1 <<  2),
+                FD_DIRTY_FRAGTEX     = (1 <<  3),
+                FD_DIRTY_VERTTEX     = (1 <<  4),
+                FD_DIRTY_TEXSTATE    = (1 <<  5),
+                FD_DIRTY_PROG        = (1 <<  6),
+                FD_DIRTY_BLEND_COLOR = (1 <<  7),
+                FD_DIRTY_STENCIL_REF = (1 <<  8),
+                FD_DIRTY_SAMPLE_MASK = (1 <<  9),
+                FD_DIRTY_FRAMEBUFFER = (1 << 10),
+                FD_DIRTY_STIPPLE     = (1 << 11),
+                FD_DIRTY_VIEWPORT    = (1 << 12),
+                FD_DIRTY_CONSTBUF    = (1 << 13),
+                FD_DIRTY_VTXSTATE    = (1 << 14),
+                FD_DIRTY_VTXBUF      = (1 << 15),
+                FD_DIRTY_INDEXBUF    = (1 << 16),
+                FD_DIRTY_SCISSOR     = (1 << 17),
+        } dirty;
+        struct pipe_blend_state *blend;
+        struct pipe_rasterizer_state *rasterizer;
+        struct pipe_depth_stencil_alpha_state *zsa;
+        struct fd_texture_stateobj verttex, fragtex;
+        struct fd_program_stateobj prog;
+        struct fd_vertex_state vtx;
+        struct pipe_blend_color blend_color;
+        struct pipe_stencil_ref stencil_ref;
+        unsigned sample_mask;
+        struct pipe_framebuffer_state framebuffer;
+        struct pipe_poly_stipple stipple;
+        struct pipe_viewport_state viewport;
+        struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+        struct pipe_index_buffer indexbuf;
+        /* GMEM/tile handling fxns: */
+        void (*emit_tile_init)(struct fd_context *ctx);
+        void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile);
+        void (*emit_tile_mem2gmem)(struct fd_context *ctx, struct fd_tile *tile);
+        void (*emit_tile_renderprep)(struct fd_context *ctx, struct fd_tile *tile);
+        void (*emit_tile_gmem2mem)(struct fd_context *ctx, struct fd_tile *tile);
+        /* optional, for GMEM bypass: */
+        void (*emit_sysmem_prep)(struct fd_context *ctx);
+        /* draw: */
+        void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info);
+        void (*clear)(struct fd_context *ctx, unsigned buffers,
+                        const union pipe_color_union *color, double depth, unsigned stencil);
+};
+static INLINE struct fd_context *
+fd_context(struct pipe_context *pctx)
+{
+        return (struct fd_context *)pctx;
+}
+static INLINE struct pipe_scissor_state *
+fd_context_get_scissor(struct fd_context *ctx)
+{
+        if (ctx->rasterizer && ctx->rasterizer->scissor)
+                return &ctx->scissor;
+        return &ctx->disabled_scissor;
+}
+static INLINE bool
+fd_supported_prim(struct fd_context *ctx, unsigned prim)
+{
+        return (1 << prim) & ctx->primtype_mask;
+}
+static INLINE void
+fd_reset_wfi(struct fd_context *ctx)
+{
+        ctx->needs_wfi = true;
+}
+/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already
+ * been one since last draw:
+ */
+static inline void
+fd_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+        if (ctx->needs_wfi) {
+                OUT_WFI(ring);
+                ctx->needs_wfi = false;
+        }
+}
+/* emit a CP_EVENT_WRITE:
+ */
+static inline void
+fd_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum vgt_event_type evt)
+{
+        OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+        OUT_RING(ring, evt);
+        fd_reset_wfi(ctx);
+}
+struct pipe_context * fd_context_init(struct fd_context *ctx,
+                struct pipe_screen *pscreen, const uint8_t *primtypes,
+                void *priv);
+void fd_context_render(struct pipe_context *pctx);
+void fd_context_destroy(struct pipe_context *pctx);
+#endif /* FREEDRENO_CONTEXT_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_draw.c
 ,0 → 1,277
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+#include "util/u_format.h"
+#include "freedreno_draw.h"
+#include "freedreno_context.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
+#include "freedreno_util.h"
+static void
+resource_reading(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+        struct fd_resource *rsc;
+        if (!prsc)
+                return;
+        rsc = fd_resource(prsc);
+        rsc->reading = true;
+        list_delinit(&rsc->list);
+        list_addtail(&rsc->list, &ctx->used_resources);
+}
+static void
+fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+        unsigned i, buffers = 0;
+        /* if we supported transform feedback, we'd have to disable this: */
+        if (((scissor->maxx - scissor->minx) *
+                        (scissor->maxy - scissor->miny)) == 0) {
+                return;
+        }
+        /* emulate unsupported primitives: */
+        if (!fd_supported_prim(ctx, info->mode)) {
+                util_primconvert_save_index_buffer(ctx->primconvert, &ctx->indexbuf);
+                util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
+                util_primconvert_draw_vbo(ctx->primconvert, info);
+                return;
+        }
+        ctx->needs_flush = true;
+        /*
+         * Figure out the buffers/features we need:
+         */
+        if (fd_depth_enabled(ctx)) {
+                buffers |= FD_BUFFER_DEPTH;
+                fd_resource(pfb->zsbuf->texture)->dirty = true;
+                ctx->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
+        }
+        if (fd_stencil_enabled(ctx)) {
+                struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+                buffers |= FD_BUFFER_STENCIL;
+                if (rsc->stencil)
+                        rsc->stencil->dirty = true;
+                else
+                        rsc->dirty = true;
+                ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
+        }
+        if (fd_logicop_enabled(ctx))
+                ctx->gmem_reason |= FD_GMEM_LOGICOP_ENABLED;
+        for (i = 0; i < pfb->nr_cbufs; i++) {
+                struct pipe_resource *surf;
+                if (!pfb->cbufs[i])
+                        continue;
+                surf = pfb->cbufs[i]->texture;
+                fd_resource(surf)->dirty = true;
+                buffers |= PIPE_CLEAR_COLOR0 << i;
+                if (surf->nr_samples > 1)
+                        ctx->gmem_reason |= FD_GMEM_MSAA_ENABLED;
+                if (fd_blend_enabled(ctx, i))
+                        ctx->gmem_reason |= FD_GMEM_BLEND_ENABLED;
+        }
+        /* Skip over buffer 0, that is sent along with the command stream */
+        for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+                resource_reading(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
+                resource_reading(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
+        }
+        /* Mark VBOs as being read */
+        for (i = 0; i < ctx->vtx.vertexbuf.count; i++) {
+                assert(!ctx->vtx.vertexbuf.vb[i].user_buffer);
+                resource_reading(ctx, ctx->vtx.vertexbuf.vb[i].buffer);
+        }
+        /* Mark index buffer as being read */
+        resource_reading(ctx, ctx->indexbuf.buffer);
+        /* Mark textures as being read */
+        for (i = 0; i < ctx->verttex.num_textures; i++)
+                if (ctx->verttex.textures[i])
+                        resource_reading(ctx, ctx->verttex.textures[i]->texture);
+        for (i = 0; i < ctx->fragtex.num_textures; i++)
+                if (ctx->fragtex.textures[i])
+                        resource_reading(ctx, ctx->fragtex.textures[i]->texture);
+        ctx->num_draws++;
+        ctx->stats.draw_calls++;
+        ctx->stats.prims_emitted +=
+                u_reduced_prims_for_vertices(info->mode, info->count);
+        /* any buffers that haven't been cleared yet, we need to restore: */
+        ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
+        /* and any buffers used, need to be resolved: */
+        ctx->resolve |= buffers;
+        DBG("%x num_draws=%u (%s/%s)", buffers, ctx->num_draws,
+                util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+                util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+        fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
+        ctx->draw_vbo(ctx, info);
+        /* if an app (or, well, piglit test) does many thousands of draws
+         * without flush (or anything which implicitly flushes, like
+         * changing render targets), we can exceed the ringbuffer size.
+         * Since we don't currently have a sane way to wrapparound, and
+         * we use the same buffer for both draw and tiling commands, for
+         * now we need to do this hack and trigger flush if we are running
+         * low on remaining space for cmds:
+         */
+        if (((ctx->ring->cur - ctx->ring->start) >
+                                (ctx->ring->size/4 - FD_TILING_COMMANDS_DWORDS)) ||
+                        (fd_mesa_debug & FD_DBG_FLUSH))
+                fd_context_render(pctx);
+}
+/* TODO figure out how to make better use of existing state mechanism
+ * for clear (and possibly gmem->mem / mem->gmem) so we can (a) keep
+ * track of what state really actually changes, and (b) reduce the code
+ * in the a2xx/a3xx parts.
+ */
+static void
+fd_clear(struct pipe_context *pctx, unsigned buffers,
+                const union pipe_color_union *color, double depth, unsigned stencil)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+        unsigned cleared_buffers;
+        int i;
+        /* for bookkeeping about which buffers have been cleared (and thus
+         * can fully or partially skip mem2gmem) we need to ignore buffers
+         * that have already had a draw, in case apps do silly things like
+         * clear after draw (ie. if you only clear the color buffer, but
+         * something like alpha-test causes side effects from the draw in
+         * the depth buffer, etc)
+         */
+        cleared_buffers = buffers & (FD_BUFFER_ALL & ~ctx->restore);
+        /* do we have full-screen scissor? */
+        if (!memcmp(scissor, &ctx->disabled_scissor, sizeof(*scissor))) {
+                ctx->cleared |= cleared_buffers;
+        } else {
+                ctx->partial_cleared |= cleared_buffers;
+                if (cleared_buffers & PIPE_CLEAR_COLOR)
+                        ctx->cleared_scissor.color = *scissor;
+                if (cleared_buffers & PIPE_CLEAR_DEPTH)
+                        ctx->cleared_scissor.depth = *scissor;
+                if (cleared_buffers & PIPE_CLEAR_STENCIL)
+                        ctx->cleared_scissor.stencil = *scissor;
+        }
+        ctx->resolve |= buffers;
+        ctx->needs_flush = true;
+        if (buffers & PIPE_CLEAR_COLOR)
+                for (i = 0; i < pfb->nr_cbufs; i++)
+                        if (buffers & (PIPE_CLEAR_COLOR0 << i))
+                                fd_resource(pfb->cbufs[i]->texture)->dirty = true;
+        if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+                struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+                if (rsc->stencil && buffers & PIPE_CLEAR_STENCIL)
+                        rsc->stencil->dirty = true;
+                if (!rsc->stencil || buffers & PIPE_CLEAR_DEPTH)
+                        rsc->dirty = true;
+                ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
+        }
+        DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil,
+                util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+                util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+        fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_CLEAR);
+        ctx->clear(ctx, buffers, color, depth, stencil);
+        ctx->dirty |= FD_DIRTY_ZSA |
+                        FD_DIRTY_VIEWPORT |
+                        FD_DIRTY_RASTERIZER |
+                        FD_DIRTY_SAMPLE_MASK |
+                        FD_DIRTY_PROG |
+                        FD_DIRTY_CONSTBUF |
+                        FD_DIRTY_BLEND;
+        if (fd_mesa_debug & FD_DBG_DCLEAR)
+                ctx->dirty = 0xffffffff;
+}
+static void
+fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
+                const union pipe_color_union *color,
+                unsigned x, unsigned y, unsigned w, unsigned h)
+{
+        DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
+}
+static void
+fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
+                unsigned buffers, double depth, unsigned stencil,
+                unsigned x, unsigned y, unsigned w, unsigned h)
+{
+        DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
+                        buffers, depth, stencil, x, y, w, h);
+}
+void
+fd_draw_init(struct pipe_context *pctx)
+{
+        list_inithead(&fd_context(pctx)->used_resources);
+        pctx->draw_vbo = fd_draw_vbo;
+        pctx->clear = fd_clear;
+        pctx->clear_render_target = fd_clear_render_target;
+        pctx->clear_depth_stencil = fd_clear_depth_stencil;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_draw.h
 ,0 → 1,147
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_DRAW_H_
+#define FREEDRENO_DRAW_H_
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "freedreno_resource.h"
+#include "freedreno_screen.h"
+#include "freedreno_util.h"
+struct fd_ringbuffer;
+void fd_draw_init(struct pipe_context *pctx);
+static inline void
+fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum pc_di_primtype primtype,
+                enum pc_di_vis_cull_mode vismode,
+                enum pc_di_src_sel src_sel, uint32_t count,
+                uint8_t instances,
+                enum pc_di_index_size idx_type,
+                uint32_t idx_size, uint32_t idx_offset,
+                struct fd_bo *idx_bo)
+{
+        /* for debug after a lock up, write a unique counter value
+         * to scratch7 for each draw, to make it easier to match up
+         * register dumps to cmdstream.  The combination of IB
+         * (scratch6) and DRAW is enough to "triangulate" the
+         * particular draw that caused lockup.
+         */
+        emit_marker(ring, 7);
+        if (is_a3xx_p0(ctx->screen)) {
+                /* dummy-draw workaround: */
+                OUT_PKT3(ring, CP_DRAW_INDX, 3);
+                OUT_RING(ring, 0x00000000);
+                OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+                                                        INDEX_SIZE_IGN, USE_VISIBILITY, 0));
+                OUT_RING(ring, 0);             /* NumIndices */
+                /* ugg, hard-code register offset to avoid pulling in the
+                 * a3xx register headers into something #included from a2xx
+                 */
+                OUT_PKT0(ring, 0x2206, 1);     /* A3XX_HLSQ_CONST_VSPRESV_RANGE_REG */
+                OUT_RING(ring, 0);
+        }
+        OUT_PKT3(ring, CP_DRAW_INDX, idx_bo ? 5 : 3);
+        OUT_RING(ring, 0x00000000);        /* viz query info. */
+        if (vismode == USE_VISIBILITY) {
+                /* leave vis mode blank for now, it will be patched up when
+                 * we know if we are binning or not
+                 */
+                OUT_RINGP(ring, DRAW(primtype, src_sel, idx_type, 0, instances),
+                                &ctx->draw_patches);
+        } else {
+                OUT_RING(ring, DRAW(primtype, src_sel, idx_type, vismode, instances));
+        }
+        OUT_RING(ring, count);             /* NumIndices */
+        if (idx_bo) {
+                OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
+                OUT_RING (ring, idx_size);
+        }
+        emit_marker(ring, 7);
+        fd_reset_wfi(ctx);
+}
+static inline enum pc_di_index_size
+size2indextype(unsigned index_size)
+{
+        switch (index_size) {
+        case 1: return INDEX_SIZE_8_BIT;
+        case 2: return INDEX_SIZE_16_BIT;
+        case 4: return INDEX_SIZE_32_BIT;
+        }
+        DBG("unsupported index size: %d", index_size);
+        assert(0);
+        return INDEX_SIZE_IGN;
+}
+/* this is same for a2xx/a3xx, so split into helper: */
+static inline void
+fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum pc_di_primtype primtype,
+                enum pc_di_vis_cull_mode vismode,
+                const struct pipe_draw_info *info)
+{
+        struct pipe_index_buffer *idx = &ctx->indexbuf;
+        struct fd_bo *idx_bo = NULL;
+        enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
+        enum pc_di_src_sel src_sel;
+        uint32_t idx_size, idx_offset;
+        if (info->indexed) {
+                assert(!idx->user_buffer);
+                idx_bo = fd_resource(idx->buffer)->bo;
+                idx_type = size2indextype(idx->index_size);
+                idx_size = idx->index_size * info->count;
+                idx_offset = idx->offset + (info->start * idx->index_size);
+                src_sel = DI_SRC_SEL_DMA;
+        } else {
+                idx_bo = NULL;
+                idx_type = INDEX_SIZE_IGN;
+                idx_size = 0;
+                idx_offset = 0;
+                src_sel = DI_SRC_SEL_AUTO_INDEX;
+        }
+        fd_draw(ctx, ring, primtype, vismode, src_sel,
+                        info->count, info->instance_count - 1,
+                        idx_type, idx_size, idx_offset, idx_bo);
+}
+#endif /* FREEDRENO_DRAW_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_fence.c
 ,0 → 1,94
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "util/u_inlines.h"
+#include "freedreno_fence.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+struct pipe_fence_handle {
+        struct pipe_reference reference;
+        struct fd_context *ctx;
+        struct fd_screen *screen;
+        uint32_t timestamp;
+};
+void
+fd_screen_fence_ref(struct pipe_screen *pscreen,
+                struct pipe_fence_handle **ptr,
+                struct pipe_fence_handle *pfence)
+{
+        if (pipe_reference(&(*ptr)->reference, &pfence->reference))
+                FREE(*ptr);
+        *ptr = pfence;
+}
+/* TODO we need to spiff out libdrm_freedreno a bit to allow passing
+ * the timeout.. and maybe a better way to check if fence has been
+ * signaled.  The current implementation is a bit lame for now to
+ * avoid bumping libdrm version requirement.
+ */
+boolean fd_screen_fence_signalled(struct pipe_screen *screen,
+                struct pipe_fence_handle *fence)
+{
+        uint32_t timestamp = fd_ringbuffer_timestamp(fence->ctx->ring);
+        /* TODO util helper for compare w/ rollover? */
+        return timestamp >= fence->timestamp;
+}
+boolean fd_screen_fence_finish(struct pipe_screen *screen,
+                struct pipe_fence_handle *fence,
+                uint64_t timeout)
+{
+        if (fd_pipe_wait(fence->screen->pipe, fence->timestamp))
+                return false;
+        return true;
+}
+struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx)
+{
+        struct pipe_fence_handle *fence;
+        struct fd_context *ctx = fd_context(pctx);
+        fence = CALLOC_STRUCT(pipe_fence_handle);
+        if (!fence)
+                return NULL;
+        pipe_reference_init(&fence->reference, 1);
+        fence->ctx = ctx;
+        fence->screen = ctx->screen;
+        fence->timestamp = fd_ringbuffer_timestamp(ctx->ring);
+        return fence;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_fence.h
 ,0 → 1,44
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_FENCE_H_
+#define FREEDRENO_FENCE_H_
+#include "pipe/p_context.h"
+void fd_screen_fence_ref(struct pipe_screen *pscreen,
+                struct pipe_fence_handle **ptr,
+                struct pipe_fence_handle *pfence);
+boolean fd_screen_fence_signalled(struct pipe_screen *screen,
+                struct pipe_fence_handle *pfence);
+boolean fd_screen_fence_finish(struct pipe_screen *screen,
+                struct pipe_fence_handle *pfence,
+                uint64_t timeout);
+struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx);
+#endif /* FREEDRENO_FENCE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_gmem.c
 ,0 → 1,456
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "freedreno_gmem.h"
+#include "freedreno_context.h"
+#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
+#include "freedreno_util.h"
+/*
+ * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer
+ * inside the GPU.  All rendering happens to GMEM.  Larger render targets
+ * are split into tiles that are small enough for the color (and depth and/or
+ * stencil, if enabled) buffers to fit within GMEM.  Before rendering a tile,
+ * if there was not a clear invalidating the previous tile contents, we need
+ * to restore the previous tiles contents (system mem -> GMEM), and after all
+ * the draw calls, before moving to the next tile, we need to save the tile
+ * contents (GMEM -> system mem).
+ *
+ * The code in this file handles dealing with GMEM and tiling.
+ *
+ * The structure of the ringbuffer ends up being:
+ *
+ *     +--<---<-- IB ---<---+---<---+---<---<---<--+
+ *     |                    |       |              |
+ *     v                    ^       ^              ^
+ *   ------------------------------------------------------
+ *     | clear/draw cmds | Tile0 | Tile1 | .... | TileN |
+ *   ------------------------------------------------------
+ *                       ^
+ *                       |
+ *                       address submitted in issueibcmds
+ *
+ * Where the per-tile section handles scissor setup, mem2gmem restore (if
+ * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem
+ * resolve.
+ */
+static uint32_t bin_width(struct fd_context *ctx)
+{
+        if (is_a4xx(ctx->screen))
+                return 1024;
+        if (is_a3xx(ctx->screen))
+                return 992;
+        return 512;
+}
+static uint32_t
+total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
+                   uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
+{
+        uint32_t total = 0, i;
+        for (i = 0; i < 4; i++) {
+                if (cbuf_cpp[i]) {
+                        gmem->cbuf_base[i] = align(total, 0x4000);
+                        total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
+                }
+        }
+        if (zsbuf_cpp[0]) {
+                gmem->zsbuf_base[0] = align(total, 0x4000);
+                total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
+        }
+        if (zsbuf_cpp[1]) {
+                gmem->zsbuf_base[1] = align(total, 0x4000);
+                total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
+        }
+        return total;
+}
+static void
+calculate_tiles(struct fd_context *ctx)
+{
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        struct pipe_scissor_state *scissor = &ctx->max_scissor;
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        uint32_t gmem_size = ctx->screen->gmemsize_bytes;
+        uint32_t minx, miny, width, height;
+        uint32_t nbins_x = 1, nbins_y = 1;
+        uint32_t bin_w, bin_h;
+        uint32_t max_width = bin_width(ctx);
+        uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0};
+        uint32_t i, j, t, xoff, yoff;
+        uint32_t tpp_x, tpp_y;
+        bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
+        int tile_n[ARRAY_SIZE(ctx->pipe)];
+        if (has_zs) {
+                struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+                zsbuf_cpp[0] = rsc->cpp;
+                if (rsc->stencil)
+                        zsbuf_cpp[1] = rsc->stencil->cpp;
+        }
+        for (i = 0; i < pfb->nr_cbufs; i++) {
+                if (pfb->cbufs[i])
+                        cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
+                else
+                        cbuf_cpp[i] = 4;
+        }
+        if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) &&
+                !memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) &&
+                !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
+                /* everything is up-to-date */
+                return;
+        }
+        if (fd_mesa_debug & FD_DBG_NOSCIS) {
+                minx = 0;
+                miny = 0;
+                width = pfb->width;
+                height = pfb->height;
+        } else {
+                minx = scissor->minx & ~31; /* round down to multiple of 32 */
+                miny = scissor->miny & ~31;
+                width = scissor->maxx - minx;
+                height = scissor->maxy - miny;
+        }
+        bin_w = align(width, 32);
+        bin_h = align(height, 32);
+        /* first, find a bin width that satisfies the maximum width
+         * restrictions:
+         */
+        while (bin_w > max_width) {
+                nbins_x++;
+                bin_w = align(width / nbins_x, 32);
+        }
+        /* then find a bin width/height that satisfies the memory
+         * constraints:
+         */
+        DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d",
+                cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0],
+                width, height);
+        while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
+                if (bin_w > bin_h) {
+                        nbins_x++;
+                        bin_w = align(width / nbins_x, 32);
+                } else {
+                        nbins_y++;
+                        bin_h = align(height / nbins_y, 32);
+                }
+        }
+        DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h);
+        gmem->scissor = *scissor;
+        memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp));
+        memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp));
+        gmem->bin_h = bin_h;
+        gmem->bin_w = bin_w;
+        gmem->nbins_x = nbins_x;
+        gmem->nbins_y = nbins_y;
+        gmem->minx = minx;
+        gmem->miny = miny;
+        gmem->width = width;
+        gmem->height = height;
+        /*
+         * Assign tiles and pipes:
+         *
+         * At some point it might be worth playing with different
+         * strategies and seeing if that makes much impact on
+         * performance.
+         */
+#define div_round_up(v, a)  (((v) + (a) - 1) / (a))
+        /* figure out number of tiles per pipe: */
+        tpp_x = tpp_y = 1;
+        while (div_round_up(nbins_y, tpp_y) > 8)
+                tpp_y += 2;
+        while ((div_round_up(nbins_y, tpp_y) *
+                        div_round_up(nbins_x, tpp_x)) > 8)
+                tpp_x += 1;
+        /* configure pipes: */
+        xoff = yoff = 0;
+        for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                if (xoff >= nbins_x) {
+                        xoff = 0;
+                        yoff += tpp_y;
+                }
+                if (yoff >= nbins_y) {
+                        break;
+                }
+                pipe->x = xoff;
+                pipe->y = yoff;
+                pipe->w = MIN2(tpp_x, nbins_x - xoff);
+                pipe->h = MIN2(tpp_y, nbins_y - yoff);
+                xoff += tpp_x;
+        }
+        for (; i < ARRAY_SIZE(ctx->pipe); i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                pipe->x = pipe->y = pipe->w = pipe->h = 0;
+        }
+#if 0 /* debug */
+        printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
+        for (i = 0; i < 8; i++) {
+                struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+                printf("pipe[%d]: %ux%u @ %u,%u\n", i,
+                                pipe->w, pipe->h, pipe->x, pipe->y);
+        }
+#endif
+        /* configure tiles: */
+        t = 0;
+        yoff = miny;
+        memset(tile_n, 0, sizeof(tile_n));
+        for (i = 0; i < nbins_y; i++) {
+                uint32_t bw, bh;
+                xoff = minx;
+                /* clip bin height: */
+                bh = MIN2(bin_h, miny + height - yoff);
+                for (j = 0; j < nbins_x; j++) {
+                        struct fd_tile *tile = &ctx->tile[t];
+                        uint32_t p;
+                        assert(t < ARRAY_SIZE(ctx->tile));
+                        /* pipe number: */
+                        p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
+                        /* clip bin width: */
+                        bw = MIN2(bin_w, minx + width - xoff);
+                        tile->n = tile_n[p]++;
+                        tile->p = p;
+                        tile->bin_w = bw;
+                        tile->bin_h = bh;
+                        tile->xoff = xoff;
+                        tile->yoff = yoff;
+                        t++;
+                        xoff += bw;
+                }
+                yoff += bh;
+        }
+#if 0 /* debug */
+        t = 0;
+        for (i = 0; i < nbins_y; i++) {
+                for (j = 0; j < nbins_x; j++) {
+                        struct fd_tile *tile = &ctx->tile[t++];
+                        printf("|p:%u n:%u|", tile->p, tile->n);
+                }
+                printf("\n");
+        }
+#endif
+}
+static void
+render_tiles(struct fd_context *ctx)
+{
+        struct fd_gmem_stateobj *gmem = &ctx->gmem;
+        int i;
+        ctx->emit_tile_init(ctx);
+        if (ctx->restore)
+                ctx->stats.batch_restore++;
+        for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
+                struct fd_tile *tile = &ctx->tile[i];
+                DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
+                        tile->bin_h, tile->yoff, tile->bin_w, tile->xoff);
+                ctx->emit_tile_prep(ctx, tile);
+                if (ctx->restore) {
+                        fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM);
+                        ctx->emit_tile_mem2gmem(ctx, tile);
+                        fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+                }
+                ctx->emit_tile_renderprep(ctx, tile);
+                fd_hw_query_prepare_tile(ctx, i, ctx->ring);
+                /* emit IB to drawcmds: */
+                OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+                fd_reset_wfi(ctx);
+                /* emit gmem2mem to transfer tile back to system memory: */
+                fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM);
+                ctx->emit_tile_gmem2mem(ctx, tile);
+                fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+        }
+}
+static void
+render_sysmem(struct fd_context *ctx)
+{
+        ctx->emit_sysmem_prep(ctx);
+        fd_hw_query_prepare_tile(ctx, 0, ctx->ring);
+        /* emit IB to drawcmds: */
+        OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
+        fd_reset_wfi(ctx);
+}
+void
+fd_gmem_render_tiles(struct fd_context *ctx)
+{
+        struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+        bool sysmem = false;
+        if (ctx->emit_sysmem_prep) {
+                if (ctx->cleared || ctx->gmem_reason || (ctx->num_draws > 5)) {
+                        DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u",
+                                ctx->cleared, ctx->gmem_reason, ctx->num_draws);
+                } else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) {
+                        sysmem = true;
+                }
+        }
+        /* close out the draw cmds by making sure any active queries are
+         * paused:
+         */
+        fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+        /* mark the end of the clear/draw cmds before emitting per-tile cmds: */
+        fd_ringmarker_mark(ctx->draw_end);
+        fd_ringmarker_mark(ctx->binning_end);
+        fd_reset_wfi(ctx);
+        ctx->stats.batch_total++;
+        if (sysmem) {
+                DBG("rendering sysmem (%s/%s)",
+                        util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+                        util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+                fd_hw_query_prepare(ctx, 1);
+                render_sysmem(ctx);
+                ctx->stats.batch_sysmem++;
+        } else {
+                struct fd_gmem_stateobj *gmem = &ctx->gmem;
+                calculate_tiles(ctx);
+                DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
+                        util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
+                        util_format_short_name(pipe_surface_format(pfb->zsbuf)));
+                fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
+                render_tiles(ctx);
+                ctx->stats.batch_gmem++;
+        }
+        /* GPU executes starting from tile cmds, which IB back to draw cmds: */
+        fd_ringmarker_flush(ctx->draw_end);
+        /* mark start for next draw/binning cmds: */
+        fd_ringmarker_mark(ctx->draw_start);
+        fd_ringmarker_mark(ctx->binning_start);
+        fd_reset_wfi(ctx);
+        /* reset maximal bounds: */
+        ctx->max_scissor.minx = ctx->max_scissor.miny = ~0;
+        ctx->max_scissor.maxx = ctx->max_scissor.maxy = 0;
+        ctx->dirty = ~0;
+}
+/* tile needs restore if it isn't completely contained within the
+ * cleared scissor:
+ */
+static bool
+skip_restore(struct pipe_scissor_state *scissor, struct fd_tile *tile)
+{
+        unsigned minx = tile->xoff;
+        unsigned maxx = tile->xoff + tile->bin_w;
+        unsigned miny = tile->yoff;
+        unsigned maxy = tile->yoff + tile->bin_h;
+        return (minx >= scissor->minx) && (maxx <= scissor->maxx) &&
+                        (miny >= scissor->miny) && (maxy <= scissor->maxy);
+}
+/* When deciding whether a tile needs mem2gmem, we need to take into
+ * account the scissor rect(s) that were cleared.  To simplify we only
+ * consider the last scissor rect for each buffer, since the common
+ * case would be a single clear.
+ */
+bool
+fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile,
+                uint32_t buffers)
+{
+        if (!(ctx->restore & buffers))
+                return false;
+        /* if buffers partially cleared, then slow-path to figure out
+         * if this particular tile needs restoring:
+         */
+        if ((buffers & FD_BUFFER_COLOR) &&
+                        (ctx->partial_cleared & FD_BUFFER_COLOR) &&
+                        skip_restore(&ctx->cleared_scissor.color, tile))
+                return false;
+        if ((buffers & FD_BUFFER_DEPTH) &&
+                        (ctx->partial_cleared & FD_BUFFER_DEPTH) &&
+                        skip_restore(&ctx->cleared_scissor.depth, tile))
+                return false;
+        if ((buffers & FD_BUFFER_STENCIL) &&
+                        (ctx->partial_cleared & FD_BUFFER_STENCIL) &&
+                        skip_restore(&ctx->cleared_scissor.stencil, tile))
+                return false;
+        return true;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_gmem.h
 ,0 → 1,67
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_GMEM_H_
+#define FREEDRENO_GMEM_H_
+#include "pipe/p_context.h"
+/* per-pipe configuration for hw binning: */
+struct fd_vsc_pipe {
+        struct fd_bo *bo;
+        uint8_t x, y, w, h;      /* VSC_PIPE[p].CONFIG */
+};
+/* per-tile configuration for hw binning: */
+struct fd_tile {
+        uint8_t p;               /* index into vsc_pipe[]s */
+        uint8_t n;               /* slot within pipe */
+        uint16_t bin_w, bin_h;
+        uint16_t xoff, yoff;
+};
+struct fd_gmem_stateobj {
+        struct pipe_scissor_state scissor;
+        uint32_t cbuf_base[4];
+        uint32_t zsbuf_base[2];
+        uint8_t cbuf_cpp[4];
+        uint8_t zsbuf_cpp[2];
+        uint16_t bin_h, nbins_y;
+        uint16_t bin_w, nbins_x;
+        uint16_t minx, miny;
+        uint16_t width, height;
+};
+struct fd_context;
+void fd_gmem_render_tiles(struct fd_context *ctx);
+bool fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile,
+                uint32_t buffers);
+#endif /* FREEDRENO_GMEM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_program.c
 ,0 → 1,161
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "tgsi/tgsi_text.h"
+#include "tgsi/tgsi_ureg.h"
+#include "freedreno_program.h"
+#include "freedreno_context.h"
+static void
+fd_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->prog.fp = hwcso;
+        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+        ctx->dirty |= FD_DIRTY_PROG;
+}
+static void
+fd_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->prog.vp = hwcso;
+        ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+        ctx->dirty |= FD_DIRTY_PROG;
+}
+static const char *solid_fp =
+        "FRAG                                        \n"
+        "PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1       \n"
+        "DCL CONST[0]                                \n"
+        "DCL OUT[0], COLOR                           \n"
+        "  0: MOV OUT[0], CONST[0]                   \n"
+        "  1: END                                    \n";
+static const char *solid_vp =
+        "VERT                                        \n"
+        "DCL IN[0]                                   \n"
+        "DCL OUT[0], POSITION                        \n"
+        "  0: MOV OUT[0], IN[0]                      \n"
+        "  1: END                                    \n";
+static const char *blit_vp =
+        "VERT                                        \n"
+        "DCL IN[0]                                   \n"
+        "DCL IN[1]                                   \n"
+        "DCL OUT[0], TEXCOORD[0]                     \n"
+        "DCL OUT[1], POSITION                        \n"
+        "  0: MOV OUT[0], IN[0]                      \n"
+        "  0: MOV OUT[1], IN[1]                      \n"
+        "  1: END                                    \n";
+static void * assemble_tgsi(struct pipe_context *pctx,
+                const char *src, bool frag)
+{
+        struct tgsi_token toks[32];
+        struct pipe_shader_state cso = {
+                        .tokens = toks,
+        };
+        tgsi_text_translate(src, toks, ARRAY_SIZE(toks));
+        if (frag)
+                return pctx->create_fs_state(pctx, &cso);
+        else
+                return pctx->create_vs_state(pctx, &cso);
+}
+static void *
+fd_prog_blit(struct pipe_context *pctx, int rts, bool depth)
+{
+        int i;
+        struct ureg_src tc;
+        struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+        if (!ureg)
+                return NULL;
+        tc = ureg_DECL_fs_input(
+                        ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_PERSPECTIVE);
+        for (i = 0; i < rts; i++)
+                ureg_TEX(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, i),
+                                 TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, i));
+        if (depth)
+                ureg_TEX(ureg,
+                                 ureg_writemask(
+                                                 ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0),
+                                                 TGSI_WRITEMASK_Z),
+                                 TGSI_TEXTURE_2D, tc, ureg_DECL_sampler(ureg, rts));
+        ureg_END(ureg);
+        return ureg_create_shader_and_destroy(ureg, pctx);
+}
+void fd_prog_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        int i;
+        pctx->bind_fs_state = fd_fp_state_bind;
+        pctx->bind_vs_state = fd_vp_state_bind;
+        // XXX for now, let a2xx keep it's own hand-rolled shaders
+        // for solid and blit progs:
+        if (ctx->screen->gpu_id < 300)
+                return;
+        ctx->solid_prog.fp = assemble_tgsi(pctx, solid_fp, true);
+        ctx->solid_prog.vp = assemble_tgsi(pctx, solid_vp, false);
+        ctx->blit_prog[0].vp = assemble_tgsi(pctx, blit_vp, false);
+        ctx->blit_prog[0].fp = fd_prog_blit(pctx, 1, false);
+        for (i = 1; i < ctx->screen->max_rts; i++) {
+                ctx->blit_prog[i].vp = ctx->blit_prog[0].vp;
+                ctx->blit_prog[i].fp = fd_prog_blit(pctx, i + 1, false);
+        }
+        ctx->blit_z.vp = ctx->blit_prog[0].vp;
+        ctx->blit_z.fp = fd_prog_blit(pctx, 0, true);
+        ctx->blit_zs.vp = ctx->blit_prog[0].vp;
+        ctx->blit_zs.fp = fd_prog_blit(pctx, 1, true);
+}
+void fd_prog_fini(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        int i;
+        pctx->delete_vs_state(pctx, ctx->solid_prog.vp);
+        pctx->delete_fs_state(pctx, ctx->solid_prog.fp);
+        pctx->delete_vs_state(pctx, ctx->blit_prog[0].vp);
+        for (i = 0; i < ctx->screen->max_rts; i++)
+                pctx->delete_fs_state(pctx, ctx->blit_prog[i].fp);
+        pctx->delete_fs_state(pctx, ctx->blit_z.fp);
+        pctx->delete_fs_state(pctx, ctx->blit_zs.fp);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_program.h
 ,0 → 1,37
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_PROGRAM_H_
+#define FREEDRENO_PROGRAM_H_
+#include "pipe/p_context.h"
+void fd_prog_init(struct pipe_context *pctx);
+void fd_prog_fini(struct pipe_context *pctx);
+#endif /* FREEDRENO_PROGRAM_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_query.c
 ,0 → 1,121
+/* -*- mode: C; c-file-style: "k&r"; ttxab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "freedreno_query.h"
+#include "freedreno_query_sw.h"
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+/*
+ * Pipe Query interface:
+ */
+static struct pipe_query *
+fd_create_query(struct pipe_context *pctx, unsigned query_type, unsigned index)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd_query *q;
+        q = fd_sw_create_query(ctx, query_type);
+        if (!q)
+                q = fd_hw_create_query(ctx, query_type);
+        return (struct pipe_query *) q;
+}
+static void
+fd_destroy_query(struct pipe_context *pctx, struct pipe_query *pq)
+{
+        struct fd_query *q = fd_query(pq);
+        q->funcs->destroy_query(fd_context(pctx), q);
+}
+static boolean
+fd_begin_query(struct pipe_context *pctx, struct pipe_query *pq)
+{
+        struct fd_query *q = fd_query(pq);
+        return q->funcs->begin_query(fd_context(pctx), q);
+}
+static void
+fd_end_query(struct pipe_context *pctx, struct pipe_query *pq)
+{
+        struct fd_query *q = fd_query(pq);
+        q->funcs->end_query(fd_context(pctx), q);
+}
+static boolean
+fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
+                boolean wait, union pipe_query_result *result)
+{
+        struct fd_query *q = fd_query(pq);
+        return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
+}
+static int
+fd_get_driver_query_info(struct pipe_screen *pscreen,
+                unsigned index, struct pipe_driver_query_info *info)
+{
+        struct pipe_driver_query_info list[] = {
+                        {"draw-calls", FD_QUERY_DRAW_CALLS, {0}},
+                        {"batches", FD_QUERY_BATCH_TOTAL, {0}},
+                        {"batches-sysmem", FD_QUERY_BATCH_SYSMEM, {0}},
+                        {"batches-gmem", FD_QUERY_BATCH_GMEM, {0}},
+                        {"restores", FD_QUERY_BATCH_RESTORE, {0}},
+                        {"prims-emitted", PIPE_QUERY_PRIMITIVES_EMITTED, {0}},
+        };
+        if (!info)
+                return ARRAY_SIZE(list);
+        if (index >= ARRAY_SIZE(list))
+                return 0;
+        *info = list[index];
+        return 1;
+}
+void
+fd_query_screen_init(struct pipe_screen *pscreen)
+{
+        pscreen->get_driver_query_info = fd_get_driver_query_info;
+}
+void
+fd_query_context_init(struct pipe_context *pctx)
+{
+        pctx->create_query = fd_create_query;
+        pctx->destroy_query = fd_destroy_query;
+        pctx->begin_query = fd_begin_query;
+        pctx->end_query = fd_end_query;
+        pctx->get_query_result = fd_get_query_result;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_query.h
 ,0 → 1,68
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_QUERY_H_
+#define FREEDRENO_QUERY_H_
+#include "pipe/p_context.h"
+struct fd_context;
+struct fd_query;
+struct fd_query_funcs {
+        void (*destroy_query)(struct fd_context *ctx,
+                        struct fd_query *q);
+        boolean (*begin_query)(struct fd_context *ctx, struct fd_query *q);
+        void (*end_query)(struct fd_context *ctx, struct fd_query *q);
+        boolean (*get_query_result)(struct fd_context *ctx,
+                        struct fd_query *q, boolean wait,
+                        union pipe_query_result *result);
+};
+struct fd_query {
+        const struct fd_query_funcs *funcs;
+        bool active;
+        int type;
+};
+static inline struct fd_query *
+fd_query(struct pipe_query *pq)
+{
+        return (struct fd_query *)pq;
+}
+#define FD_QUERY_DRAW_CALLS      (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define FD_QUERY_BATCH_TOTAL     (PIPE_QUERY_DRIVER_SPECIFIC + 1)  /* total # of batches (submits) */
+#define FD_QUERY_BATCH_SYSMEM    (PIPE_QUERY_DRIVER_SPECIFIC + 2)  /* batches using system memory (GMEM bypass) */
+#define FD_QUERY_BATCH_GMEM      (PIPE_QUERY_DRIVER_SPECIFIC + 3)  /* batches using GMEM */
+#define FD_QUERY_BATCH_RESTORE   (PIPE_QUERY_DRIVER_SPECIFIC + 4)  /* batches requiring GMEM restore */
+void fd_query_screen_init(struct pipe_screen *pscreen);
+void fd_query_context_init(struct pipe_context *pctx);
+#endif /* FREEDRENO_QUERY_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_query_hw.c
 ,0 → 1,467
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+struct fd_hw_sample_period {
+        struct fd_hw_sample *start, *end;
+        struct list_head list;
+};
+/* maps query_type to sample provider idx: */
+static int pidx(unsigned query_type)
+{
+        switch (query_type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+                return 0;
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+                return 1;
+        default:
+                return -1;
+        }
+}
+static struct fd_hw_sample *
+get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                unsigned query_type)
+{
+        struct fd_hw_sample *samp = NULL;
+        int idx = pidx(query_type);
+        if (!ctx->sample_cache[idx]) {
+                ctx->sample_cache[idx] =
+                        ctx->sample_providers[idx]->get_sample(ctx, ring);
+        }
+        fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
+        return samp;
+}
+static void
+clear_sample_cache(struct fd_context *ctx)
+{
+        int i;
+        for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
+                fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
+}
+static bool
+is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
+{
+        return !!(hq->provider->active & stage);
+}
+static void
+resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
+                struct fd_ringbuffer *ring)
+{
+        assert(!hq->period);
+        hq->period = util_slab_alloc(&ctx->sample_period_pool);
+        list_inithead(&hq->period->list);
+        hq->period->start = get_sample(ctx, ring, hq->base.type);
+        /* NOTE: util_slab_alloc() does not zero out the buffer: */
+        hq->period->end = NULL;
+}
+static void
+pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
+                struct fd_ringbuffer *ring)
+{
+        assert(hq->period && !hq->period->end);
+        hq->period->end = get_sample(ctx, ring, hq->base.type);
+        list_addtail(&hq->period->list, &hq->current_periods);
+        hq->period = NULL;
+}
+static void
+destroy_periods(struct fd_context *ctx, struct list_head *list)
+{
+        struct fd_hw_sample_period *period, *s;
+        LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
+                fd_hw_sample_reference(ctx, &period->start, NULL);
+                fd_hw_sample_reference(ctx, &period->end, NULL);
+                list_del(&period->list);
+                util_slab_free(&ctx->sample_period_pool, period);
+        }
+}
+static void
+fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
+{
+        struct fd_hw_query *hq = fd_hw_query(q);
+        destroy_periods(ctx, &hq->periods);
+        destroy_periods(ctx, &hq->current_periods);
+        list_del(&hq->list);
+        free(hq);
+}
+static boolean
+fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
+{
+        struct fd_hw_query *hq = fd_hw_query(q);
+        if (q->active)
+                return false;
+        /* begin_query() should clear previous results: */
+        destroy_periods(ctx, &hq->periods);
+        if (is_active(hq, ctx->stage))
+                resume_query(ctx, hq, ctx->ring);
+        q->active = true;
+        /* add to active list: */
+        list_del(&hq->list);
+        list_addtail(&hq->list, &ctx->active_queries);
+   return true;
+}
+static void
+fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
+{
+        struct fd_hw_query *hq = fd_hw_query(q);
+        if (!q->active)
+                return;
+        if (is_active(hq, ctx->stage))
+                pause_query(ctx, hq, ctx->ring);
+        q->active = false;
+        /* move to current list: */
+        list_del(&hq->list);
+        list_addtail(&hq->list, &ctx->current_queries);
+}
+/* helper to get ptr to specified sample: */
+static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
+{
+        return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
+}
+static boolean
+fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
+                boolean wait, union pipe_query_result *result)
+{
+        struct fd_hw_query *hq = fd_hw_query(q);
+        const struct fd_hw_sample_provider *p = hq->provider;
+        struct fd_hw_sample_period *period;
+        if (q->active)
+                return false;
+        /* if the app tries to read back the query result before the
+         * batch is submitted, that forces us to flush so that there
+         * are actually results to wait for:
+         */
+        if (!LIST_IS_EMPTY(&hq->list)) {
+                /* if app didn't actually trigger any cmdstream, then
+                 * we have nothing to do:
+                 */
+                if (!ctx->needs_flush)
+                        return true;
+                DBG("reading query result forces flush!");
+                fd_context_render(&ctx->base);
+        }
+        util_query_clear_result(result, q->type);
+        if (LIST_IS_EMPTY(&hq->periods))
+                return true;
+        assert(LIST_IS_EMPTY(&hq->list));
+        assert(LIST_IS_EMPTY(&hq->current_periods));
+        assert(!hq->period);
+        /* if !wait, then check the last sample (the one most likely to
+         * not be ready yet) and bail if it is not ready:
+         */
+        if (!wait) {
+                int ret;
+                period = LIST_ENTRY(struct fd_hw_sample_period,
+                                hq->periods.prev, list);
+                ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe,
+                                DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
+                if (ret)
+                        return false;
+                fd_bo_cpu_fini(period->end->bo);
+        }
+        /* sum the result across all sample periods: */
+        LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
+                struct fd_hw_sample *start = period->start;
+                struct fd_hw_sample *end = period->end;
+                unsigned i;
+                /* start and end samples should be from same batch: */
+                assert(start->bo == end->bo);
+                assert(start->num_tiles == end->num_tiles);
+                for (i = 0; i < start->num_tiles; i++) {
+                        void *ptr;
+                        fd_bo_cpu_prep(start->bo, ctx->screen->pipe,
+                                        DRM_FREEDRENO_PREP_READ);
+                        ptr = fd_bo_map(start->bo);
+                        p->accumulate_result(ctx, sampptr(period->start, i, ptr),
+                                        sampptr(period->end, i, ptr), result);
+                        fd_bo_cpu_fini(start->bo);
+                }
+        }
+        return true;
+}
+static const struct fd_query_funcs hw_query_funcs = {
+                .destroy_query    = fd_hw_destroy_query,
+                .begin_query      = fd_hw_begin_query,
+                .end_query        = fd_hw_end_query,
+                .get_query_result = fd_hw_get_query_result,
+};
+struct fd_query *
+fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
+{
+        struct fd_hw_query *hq;
+        struct fd_query *q;
+        int idx = pidx(query_type);
+        if ((idx < 0) || !ctx->sample_providers[idx])
+                return NULL;
+        hq = CALLOC_STRUCT(fd_hw_query);
+        if (!hq)
+                return NULL;
+        hq->provider = ctx->sample_providers[idx];
+        list_inithead(&hq->periods);
+        list_inithead(&hq->current_periods);
+        list_inithead(&hq->list);
+        q = &hq->base;
+        q->funcs = &hw_query_funcs;
+        q->type = query_type;
+        return q;
+}
+struct fd_hw_sample *
+fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
+{
+        struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
+        pipe_reference_init(&samp->reference, 1);
+        samp->size = size;
+        samp->offset = ctx->next_sample_offset;
+        /* NOTE: util_slab_alloc() does not zero out the buffer: */
+        samp->bo = NULL;
+        samp->num_tiles = 0;
+        samp->tile_stride = 0;
+        ctx->next_sample_offset += size;
+        return samp;
+}
+void
+__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
+{
+        if (samp->bo)
+                fd_bo_del(samp->bo);
+        util_slab_free(&ctx->sample_pool, samp);
+}
+static void
+prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
+                uint32_t num_tiles, uint32_t tile_stride)
+{
+        if (samp->bo) {
+                assert(samp->bo == bo);
+                assert(samp->num_tiles == num_tiles);
+                assert(samp->tile_stride == tile_stride);
+                return;
+        }
+        samp->bo = bo;
+        samp->num_tiles = num_tiles;
+        samp->tile_stride = tile_stride;
+}
+static void
+prepare_query(struct fd_hw_query *hq, struct fd_bo *bo,
+                uint32_t num_tiles, uint32_t tile_stride)
+{
+        struct fd_hw_sample_period *period, *s;
+        /* prepare all the samples in the query: */
+        LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
+                prepare_sample(period->start, bo, num_tiles, tile_stride);
+                prepare_sample(period->end, bo, num_tiles, tile_stride);
+                /* move from current_periods list to periods list: */
+                list_del(&period->list);
+                list_addtail(&period->list, &hq->periods);
+        }
+}
+static void
+prepare_queries(struct fd_context *ctx, struct fd_bo *bo,
+                uint32_t num_tiles, uint32_t tile_stride,
+                struct list_head *list, bool remove)
+{
+        struct fd_hw_query *hq, *s;
+        LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
+                prepare_query(hq, bo, num_tiles, tile_stride);
+                if (remove)
+                        list_delinit(&hq->list);
+        }
+}
+/* called from gmem code once total storage requirements are known (ie.
+ * number of samples times number of tiles)
+ */
+void
+fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
+{
+        uint32_t tile_stride = ctx->next_sample_offset;
+        struct fd_bo *bo;
+        if (ctx->query_bo)
+                fd_bo_del(ctx->query_bo);
+        if (tile_stride > 0) {
+                bo = fd_bo_new(ctx->dev, tile_stride * num_tiles,
+                                DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+                                DRM_FREEDRENO_GEM_TYPE_KMEM);
+        } else {
+                bo = NULL;
+        }
+        ctx->query_bo = bo;
+        ctx->query_tile_stride = tile_stride;
+        prepare_queries(ctx, bo, num_tiles, tile_stride,
+                        &ctx->active_queries, false);
+        prepare_queries(ctx, bo, num_tiles, tile_stride,
+                        &ctx->current_queries, true);
+        /* reset things for next batch: */
+        ctx->next_sample_offset = 0;
+}
+void
+fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+                struct fd_ringbuffer *ring)
+{
+        uint32_t tile_stride = ctx->query_tile_stride;
+        uint32_t offset = tile_stride * n;
+        /* bail if no queries: */
+        if (tile_stride == 0)
+                return;
+        fd_wfi(ctx, ring);
+        OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
+        OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0);
+}
+void
+fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
+                enum fd_render_stage stage)
+{
+        /* special case: internal blits (like mipmap level generation)
+         * go through normal draw path (via util_blitter_blit()).. but
+         * we need to ignore the FD_STAGE_DRAW which will be set, so we
+         * don't enable queries which should be paused during internal
+         * blits:
+         */
+        if ((ctx->stage == FD_STAGE_BLIT) &&
+                        (stage != FD_STAGE_NULL))
+                return;
+        if (stage != ctx->stage) {
+                struct fd_hw_query *hq;
+                LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
+                        bool was_active = is_active(hq, ctx->stage);
+                        bool now_active = is_active(hq, stage);
+                        if (now_active && !was_active)
+                                resume_query(ctx, hq, ring);
+                        else if (was_active && !now_active)
+                                pause_query(ctx, hq, ring);
+                }
+        }
+        clear_sample_cache(ctx);
+        ctx->stage = stage;
+}
+void
+fd_hw_query_register_provider(struct pipe_context *pctx,
+                const struct fd_hw_sample_provider *provider)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        int idx = pidx(provider->query_type);
+        assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
+        assert(!ctx->sample_providers[idx]);
+        ctx->sample_providers[idx] = provider;
+}
+void
+fd_hw_query_init(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
+, UTIL_SLAB_SINGLETHREADED);
+        util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
+, UTIL_SLAB_SINGLETHREADED);
+        list_inithead(&ctx->active_queries);
+        list_inithead(&ctx->current_queries);
+}
+void
+fd_hw_query_fini(struct pipe_context *pctx)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        util_slab_destroy(&ctx->sample_pool);
+        util_slab_destroy(&ctx->sample_period_pool);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_query_hw.h
 ,0 → 1,164
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_QUERY_HW_H_
+#define FREEDRENO_QUERY_HW_H_
+#include "util/list.h"
+#include "freedreno_query.h"
+#include "freedreno_context.h"
+/*
+ * HW Queries:
+ *
+ * See: https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries
+ *
+ * Hardware queries will be specific to gpu generation, but they need
+ * some common infrastructure for triggering start/stop samples at
+ * various points (for example, to exclude mem2gmem/gmem2mem or clear)
+ * as well as per tile tracking.
+ *
+ * NOTE: in at least some cases hw writes sample values to memory addr
+ * specified in some register.  So we don't really have the option to
+ * just sample the same counter multiple times for multiple different
+ * queries with the same query_type.  So we cache per sample provider
+ * the most recent sample since the last draw.  This way multiple
+ * sample periods for multiple queries can reference the same sample.
+ *
+ * fd_hw_sample_provider:
+ *   - one per query type, registered/implemented by gpu generation
+ *     specific code
+ *   - can construct fd_hw_samples on demand
+ *   - most recent sample (since last draw) cached so multiple
+ *     different queries can ref the same sample
+ *
+ * fd_hw_sample:
+ *   - abstracts one snapshot of counter value(s) across N tiles
+ *   - backing object not allocated until submit time when number
+ *     of samples and number of tiles is known
+ *
+ * fd_hw_sample_period:
+ *   - consists of start and stop sample
+ *   - a query accumulates a list of sample periods
+ *   - the query result is the sum of the sample periods
+ */
+struct fd_hw_sample_provider {
+        unsigned query_type;
+        /* stages applicable to the query type: */
+        enum fd_render_stage active;
+        /* when a new sample is required, emit appropriate cmdstream
+         * and return a sample object:
+         */
+        struct fd_hw_sample *(*get_sample)(struct fd_context *ctx,
+                        struct fd_ringbuffer *ring);
+        /* accumulate the results from specified sample period: */
+        void (*accumulate_result)(struct fd_context *ctx,
+                        const void *start, const void *end,
+                        union pipe_query_result *result);
+};
+struct fd_hw_sample {
+        struct pipe_reference reference;  /* keep this first */
+        /* offset and size of the sample are know at the time the
+         * sample is constructed.
+         */
+        uint32_t size;
+        uint32_t offset;
+        /* backing object, offset/stride/etc are determined not when
+         * the sample is constructed, but when the batch is submitted.
+         * This way we can defer allocation until total # of requested
+         * samples, and total # of tiles, is known.
+         */
+        struct fd_bo *bo;
+        uint32_t num_tiles;
+        uint32_t tile_stride;
+};
+struct fd_hw_sample_period;
+struct fd_hw_query {
+        struct fd_query base;
+        const struct fd_hw_sample_provider *provider;
+        /* list of fd_hw_sample_period in previous submits: */
+        struct list_head periods;
+        /* list of fd_hw_sample_period's in current submit: */
+        struct list_head current_periods;
+        /* if active and not paused, the current sample period (not
+         * yet added to current_periods):
+         */
+        struct fd_hw_sample_period *period;
+        struct list_head list;  /* list-node in ctx->active_queries */
+};
+static inline struct fd_hw_query *
+fd_hw_query(struct fd_query *q)
+{
+        return (struct fd_hw_query *)q;
+}
+struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
+/* helper for sample providers: */
+struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size);
+/* don't call directly, use fd_hw_sample_reference() */
+void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp);
+void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles);
+void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
+                struct fd_ringbuffer *ring);
+void fd_hw_query_set_stage(struct fd_context *ctx,
+                struct fd_ringbuffer *ring, enum fd_render_stage stage);
+void fd_hw_query_register_provider(struct pipe_context *pctx,
+                const struct fd_hw_sample_provider *provider);
+void fd_hw_query_init(struct pipe_context *pctx);
+void fd_hw_query_fini(struct pipe_context *pctx);
+static inline void
+fd_hw_sample_reference(struct fd_context *ctx,
+                struct fd_hw_sample **ptr, struct fd_hw_sample *samp)
+{
+        struct fd_hw_sample *old_samp = *ptr;
+        if (pipe_reference(&(*ptr)->reference, &samp->reference))
+                __fd_hw_sample_destroy(ctx, old_samp);
+        if (ptr)
+                *ptr = samp;
+}
+#endif /* FREEDRENO_QUERY_HW_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_query_sw.c
 ,0 → 1,166
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "os/os_time.h"
+#include "freedreno_query_sw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+/*
+ * SW Queries:
+ *
+ * In the core, we have some support for basic sw counters
+ */
+static void
+fd_sw_destroy_query(struct fd_context *ctx, struct fd_query *q)
+{
+        struct fd_sw_query *sq = fd_sw_query(q);
+        free(sq);
+}
+static uint64_t
+read_counter(struct fd_context *ctx, int type)
+{
+        switch (type) {
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+                /* for now same thing as _PRIMITIVES_EMITTED */
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+                return ctx->stats.prims_emitted;
+        case FD_QUERY_DRAW_CALLS:
+                return ctx->stats.draw_calls;
+        case FD_QUERY_BATCH_TOTAL:
+                return ctx->stats.batch_total;
+        case FD_QUERY_BATCH_SYSMEM:
+                return ctx->stats.batch_sysmem;
+        case FD_QUERY_BATCH_GMEM:
+                return ctx->stats.batch_gmem;
+        case FD_QUERY_BATCH_RESTORE:
+                return ctx->stats.batch_restore;
+        }
+        return 0;
+}
+static bool
+is_rate_query(struct fd_query *q)
+{
+        switch (q->type) {
+        case FD_QUERY_BATCH_TOTAL:
+        case FD_QUERY_BATCH_SYSMEM:
+        case FD_QUERY_BATCH_GMEM:
+        case FD_QUERY_BATCH_RESTORE:
+                return true;
+        default:
+                return false;
+        }
+}
+static boolean
+fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q)
+{
+        struct fd_sw_query *sq = fd_sw_query(q);
+        q->active = true;
+        sq->begin_value = read_counter(ctx, q->type);
+        if (is_rate_query(q))
+                sq->begin_time = os_time_get();
+   return true;
+}
+static void
+fd_sw_end_query(struct fd_context *ctx, struct fd_query *q)
+{
+        struct fd_sw_query *sq = fd_sw_query(q);
+        q->active = false;
+        sq->end_value = read_counter(ctx, q->type);
+        if (is_rate_query(q))
+                sq->end_time = os_time_get();
+}
+static boolean
+fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q,
+                boolean wait, union pipe_query_result *result)
+{
+        struct fd_sw_query *sq = fd_sw_query(q);
+        if (q->active)
+                return false;
+        util_query_clear_result(result, q->type);
+        result->u64 = sq->end_value - sq->begin_value;
+        if (is_rate_query(q)) {
+                double fps = (result->u64 * 1000000) /
+                                (double)(sq->end_time - sq->begin_time);
+                result->u64 = (uint64_t)fps;
+        }
+        return true;
+}
+static const struct fd_query_funcs sw_query_funcs = {
+                .destroy_query    = fd_sw_destroy_query,
+                .begin_query      = fd_sw_begin_query,
+                .end_query        = fd_sw_end_query,
+                .get_query_result = fd_sw_get_query_result,
+};
+struct fd_query *
+fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
+{
+        struct fd_sw_query *sq;
+        struct fd_query *q;
+        switch (query_type) {
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+        case FD_QUERY_DRAW_CALLS:
+        case FD_QUERY_BATCH_TOTAL:
+        case FD_QUERY_BATCH_SYSMEM:
+        case FD_QUERY_BATCH_GMEM:
+        case FD_QUERY_BATCH_RESTORE:
+                break;
+        default:
+                return NULL;
+        }
+        sq = CALLOC_STRUCT(fd_sw_query);
+        if (!sq)
+                return NULL;
+        q = &sq->base;
+        q->funcs = &sw_query_funcs;
+        q->type = query_type;
+        return q;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_query_sw.h
 ,0 → 1,55
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_QUERY_SW_H_
+#define FREEDRENO_QUERY_SW_H_
+#include "freedreno_query.h"
+/*
+ * SW Queries:
+ *
+ * In the core, we have some support for basic sw counters
+ */
+struct fd_sw_query {
+        struct fd_query base;
+        uint64_t begin_value, end_value;
+        uint64_t begin_time, end_time;
+};
+static inline struct fd_sw_query *
+fd_sw_query(struct fd_query *q)
+{
+        return (struct fd_sw_query *)q;
+}
+struct fd_query * fd_sw_create_query(struct fd_context *ctx,
+                unsigned query_type);
+#endif /* FREEDRENO_QUERY_SW_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_resource.c
 ,0 → 1,703
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "util/u_format.h"
+#include "util/u_format_zs.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "util/u_string.h"
+#include "util/u_surface.h"
+#include "freedreno_resource.h"
+#include "freedreno_screen.h"
+#include "freedreno_surface.h"
+#include "freedreno_context.h"
+#include "freedreno_query_hw.h"
+#include "freedreno_util.h"
+#include <errno.h>
+static void
+fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+        int i;
+        /* Go through the entire state and see if the resource is bound
+         * anywhere. If it is, mark the relevant state as dirty. This is called on
+         * realloc_bo.
+         */
+        /* Constbufs */
+        for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS && !(ctx->dirty & FD_DIRTY_CONSTBUF); i++) {
+                if (ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer == prsc)
+                        ctx->dirty |= FD_DIRTY_CONSTBUF;
+                if (ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer == prsc)
+                        ctx->dirty |= FD_DIRTY_CONSTBUF;
+        }
+        /* VBOs */
+        for (i = 0; i < ctx->vtx.vertexbuf.count && !(ctx->dirty & FD_DIRTY_VTXBUF); i++) {
+                if (ctx->vtx.vertexbuf.vb[i].buffer == prsc)
+                        ctx->dirty |= FD_DIRTY_VTXBUF;
+        }
+        /* Index buffer */
+        if (ctx->indexbuf.buffer == prsc)
+                ctx->dirty |= FD_DIRTY_INDEXBUF;
+        /* Textures */
+        for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
+                if (ctx->verttex.textures[i]->texture == prsc)
+                        ctx->dirty |= FD_DIRTY_VERTTEX;
+        }
+        for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
+                if (ctx->fragtex.textures[i]->texture == prsc)
+                        ctx->dirty |= FD_DIRTY_FRAGTEX;
+        }
+}
+static void
+realloc_bo(struct fd_resource *rsc, uint32_t size)
+{
+        struct fd_screen *screen = fd_screen(rsc->base.b.screen);
+        uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+                        DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
+        /* if we start using things other than write-combine,
+         * be sure to check for PIPE_RESOURCE_FLAG_MAP_COHERENT
+         */
+        if (rsc->bo)
+                fd_bo_del(rsc->bo);
+        rsc->bo = fd_bo_new(screen->dev, size, flags);
+        rsc->timestamp = 0;
+        rsc->dirty = rsc->reading = false;
+        list_delinit(&rsc->list);
+        util_range_set_empty(&rsc->valid_buffer_range);
+}
+/* Currently this is only used for flushing Z32_S8 texture transfers, but
+ * eventually it should handle everything.
+ */
+static void
+fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+{
+        struct fd_resource *rsc = fd_resource(trans->base.resource);
+        struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
+        struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
+        enum pipe_format format = trans->base.resource->format;
+        float *depth = fd_bo_map(rsc->bo) + slice->offset +
+                (trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
+        uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
+                (trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
+        assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
+                   format == PIPE_FORMAT_X32_S8X24_UINT);
+        if (format != PIPE_FORMAT_X32_S8X24_UINT)
+                util_format_z32_float_s8x24_uint_unpack_z_float(
+                                depth, slice->pitch * 4,
+                                trans->staging, trans->base.stride,
+                                box->width, box->height);
+        util_format_z32_float_s8x24_uint_unpack_s_8uint(
+                        stencil, sslice->pitch,
+                        trans->staging, trans->base.stride,
+                        box->width, box->height);
+}
+static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
+                struct pipe_transfer *ptrans,
+                const struct pipe_box *box)
+{
+        struct fd_resource *rsc = fd_resource(ptrans->resource);
+        struct fd_transfer *trans = fd_transfer(ptrans);
+        if (ptrans->resource->target == PIPE_BUFFER)
+                util_range_add(&rsc->valid_buffer_range,
+                                           ptrans->box.x + box->x,
+                                           ptrans->box.x + box->x + box->width);
+        if (trans->staging)
+                fd_resource_flush(trans, box);
+}
+static void
+fd_resource_transfer_unmap(struct pipe_context *pctx,
+                struct pipe_transfer *ptrans)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd_resource *rsc = fd_resource(ptrans->resource);
+        struct fd_transfer *trans = fd_transfer(ptrans);
+        if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+                struct pipe_box box;
+                u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
+                fd_resource_flush(trans, &box);
+        }
+        if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+                fd_bo_cpu_fini(rsc->bo);
+                if (rsc->stencil)
+                        fd_bo_cpu_fini(rsc->stencil->bo);
+        }
+        util_range_add(&rsc->valid_buffer_range,
+                                   ptrans->box.x,
+                                   ptrans->box.x + ptrans->box.width);
+        pipe_resource_reference(&ptrans->resource, NULL);
+        util_slab_free(&ctx->transfer_pool, ptrans);
+        if (trans->staging)
+                free(trans->staging);
+}
+static void *
+fd_resource_transfer_map(struct pipe_context *pctx,
+                struct pipe_resource *prsc,
+                unsigned level, unsigned usage,
+                const struct pipe_box *box,
+                struct pipe_transfer **pptrans)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd_resource *rsc = fd_resource(prsc);
+        struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+        struct fd_transfer *trans;
+        struct pipe_transfer *ptrans;
+        enum pipe_format format = prsc->format;
+        uint32_t op = 0;
+        uint32_t offset;
+        char *buf;
+        int ret = 0;
+        DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
+                box->width, box->height, box->x, box->y);
+        ptrans = util_slab_alloc(&ctx->transfer_pool);
+        if (!ptrans)
+                return NULL;
+        /* util_slab_alloc() doesn't zero: */
+        trans = fd_transfer(ptrans);
+        memset(trans, 0, sizeof(*trans));
+        pipe_resource_reference(&ptrans->resource, prsc);
+        ptrans->level = level;
+        ptrans->usage = usage;
+        ptrans->box = *box;
+        ptrans->stride = slice->pitch * rsc->cpp;
+        ptrans->layer_stride = slice->size0;
+        if (usage & PIPE_TRANSFER_READ)
+                op |= DRM_FREEDRENO_PREP_READ;
+        if (usage & PIPE_TRANSFER_WRITE)
+                op |= DRM_FREEDRENO_PREP_WRITE;
+        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+                realloc_bo(rsc, fd_bo_size(rsc->bo));
+                if (rsc->stencil)
+                        realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
+                fd_invalidate_resource(ctx, prsc);
+        } else if ((usage & PIPE_TRANSFER_WRITE) &&
+                           prsc->target == PIPE_BUFFER &&
+                           !util_ranges_intersect(&rsc->valid_buffer_range,
+                                                                          box->x, box->x + box->width)) {
+                /* We are trying to write to a previously uninitialized range. No need
+                 * to wait.
+                 */
+        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+                /* If the GPU is writing to the resource, or if it is reading from the
+                 * resource and we're trying to write to it, flush the renders.
+                 */
+                if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) ||
+                        ((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading))
+                        fd_context_render(pctx);
+                /* The GPU keeps track of how the various bo's are being used, and
+                 * will wait if necessary for the proper operation to have
+                 * completed.
+                 */
+                ret = fd_bo_cpu_prep(rsc->bo, ctx->screen->pipe, op);
+                if (ret)
+                        goto fail;
+        }
+        buf = fd_bo_map(rsc->bo);
+        if (!buf) {
+                fd_resource_transfer_unmap(pctx, ptrans);
+                return NULL;
+        }
+        if (rsc->layer_first) {
+                offset = slice->offset +
+                        box->y / util_format_get_blockheight(format) * ptrans->stride +
+                        box->x / util_format_get_blockwidth(format) * rsc->cpp +
+                        box->z * rsc->layer_size;
+        } else {
+                offset = slice->offset +
+                        box->y / util_format_get_blockheight(format) * ptrans->stride +
+                        box->x / util_format_get_blockwidth(format) * rsc->cpp +
+                        box->z * slice->size0;
+        }
+        if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
+                prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+                trans->base.stride = trans->base.box.width * rsc->cpp * 2;
+                trans->staging = malloc(trans->base.stride * trans->base.box.height);
+                if (!trans->staging)
+                        goto fail;
+                /* if we're not discarding the whole range (or resource), we must copy
+                 * the real data in.
+                 */
+                if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+                                           PIPE_TRANSFER_DISCARD_RANGE))) {
+                        struct fd_resource_slice *sslice =
+                                fd_resource_slice(rsc->stencil, level);
+                        void *sbuf = fd_bo_map(rsc->stencil->bo);
+                        if (!sbuf)
+                                goto fail;
+                        float *depth = (float *)(buf + slice->offset +
+                                box->y * slice->pitch * 4 + box->x * 4);
+                        uint8_t *stencil = sbuf + sslice->offset +
+                                box->y * sslice->pitch + box->x;
+                        if (format != PIPE_FORMAT_X32_S8X24_UINT)
+                                util_format_z32_float_s8x24_uint_pack_z_float(
+                                                trans->staging, trans->base.stride,
+                                                depth, slice->pitch * 4,
+                                                box->width, box->height);
+                        util_format_z32_float_s8x24_uint_pack_s_8uint(
+                                        trans->staging, trans->base.stride,
+                                        stencil, sslice->pitch,
+                                        box->width, box->height);
+                }
+                buf = trans->staging;
+                offset = 0;
+        }
+        *pptrans = ptrans;
+        return buf + offset;
+fail:
+        fd_resource_transfer_unmap(pctx, ptrans);
+        return NULL;
+}
+static void
+fd_resource_destroy(struct pipe_screen *pscreen,
+                struct pipe_resource *prsc)
+{
+        struct fd_resource *rsc = fd_resource(prsc);
+        if (rsc->bo)
+                fd_bo_del(rsc->bo);
+        list_delinit(&rsc->list);
+        util_range_destroy(&rsc->valid_buffer_range);
+        FREE(rsc);
+}
+static boolean
+fd_resource_get_handle(struct pipe_screen *pscreen,
+                struct pipe_resource *prsc,
+                struct winsys_handle *handle)
+{
+        struct fd_resource *rsc = fd_resource(prsc);
+        return fd_screen_bo_get_handle(pscreen, rsc->bo,
+                        rsc->slices[0].pitch * rsc->cpp, handle);
+}
+static const struct u_resource_vtbl fd_resource_vtbl = {
+                .resource_get_handle      = fd_resource_get_handle,
+                .resource_destroy         = fd_resource_destroy,
+                .transfer_map             = fd_resource_transfer_map,
+                .transfer_flush_region    = fd_resource_transfer_flush_region,
+                .transfer_unmap           = fd_resource_transfer_unmap,
+                .transfer_inline_write    = u_default_transfer_inline_write,
+};
+static uint32_t
+setup_slices(struct fd_resource *rsc, uint32_t alignment)
+{
+        struct pipe_resource *prsc = &rsc->base.b;
+        uint32_t level, size = 0;
+        uint32_t width = prsc->width0;
+        uint32_t height = prsc->height0;
+        uint32_t depth = prsc->depth0;
+        /* in layer_first layout, the level (slice) contains just one
+         * layer (since in fact the layer contains the slices)
+         */
+        uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
+        for (level = 0; level <= prsc->last_level; level++) {
+                struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+                slice->pitch = width = align(width, 32);
+                slice->offset = size;
+                /* 1d array and 2d array textures must all have the same layer size
+                 * for each miplevel on a3xx. 3d textures can have different layer
+                 * sizes for high levels, but the hw auto-sizer is buggy (or at least
+                 * different than what this code does), so as soon as the layer size
+                 * range gets into range, we stop reducing it.
+                 */
+                if (prsc->target == PIPE_TEXTURE_3D && (
+                                        level == 1 ||
+                                        (level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
+                        slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+                else if (level == 0 || rsc->layer_first || alignment == 1)
+                        slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+                else
+                        slice->size0 = rsc->slices[level - 1].size0;
+                size += slice->size0 * depth * layers_in_level;
+                width = u_minify(width, 1);
+                height = u_minify(height, 1);
+                depth = u_minify(depth, 1);
+        }
+        return size;
+}
+static uint32_t
+slice_alignment(struct pipe_screen *pscreen, const struct pipe_resource *tmpl)
+{
+        /* on a3xx, 2d array and 3d textures seem to want their
+         * layers aligned to page boundaries:
+         */
+        switch (tmpl->target) {
+        case PIPE_TEXTURE_3D:
+        case PIPE_TEXTURE_1D_ARRAY:
+        case PIPE_TEXTURE_2D_ARRAY:
+                return 4096;
+        default:
+                return 1;
+        }
+}
+/**
+ * Create a new texture object, using the given template info.
+ */
+static struct pipe_resource *
+fd_resource_create(struct pipe_screen *pscreen,
+                const struct pipe_resource *tmpl)
+{
+        struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
+        struct pipe_resource *prsc = &rsc->base.b;
+        uint32_t size;
+        DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
+                        "nr_samples=%u, usage=%u, bind=%x, flags=%x",
+                        tmpl->target, util_format_name(tmpl->format),
+                        tmpl->width0, tmpl->height0, tmpl->depth0,
+                        tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
+                        tmpl->usage, tmpl->bind, tmpl->flags);
+        if (!rsc)
+                return NULL;
+        *prsc = *tmpl;
+        pipe_reference_init(&prsc->reference, 1);
+        list_inithead(&rsc->list);
+        prsc->screen = pscreen;
+        util_range_init(&rsc->valid_buffer_range);
+        rsc->base.vtbl = &fd_resource_vtbl;
+        if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+                rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT);
+        else
+                rsc->cpp = util_format_get_blocksize(tmpl->format);
+        assert(rsc->cpp);
+        if (is_a4xx(fd_screen(pscreen))) {
+                switch (tmpl->target) {
+                case PIPE_TEXTURE_3D:
+                        /* TODO 3D_ARRAY? */
+                        rsc->layer_first = false;
+                        break;
+                default:
+                        rsc->layer_first = true;
+                        break;
+                }
+        }
+        size = setup_slices(rsc, slice_alignment(pscreen, tmpl));
+        if (rsc->layer_first) {
+                rsc->layer_size = align(size, 4096);
+                size = rsc->layer_size * prsc->array_size;
+        }
+        realloc_bo(rsc, size);
+        if (!rsc->bo)
+                goto fail;
+        /* There is no native Z32F_S8 sampling or rendering format, so this must
+         * be emulated via two separate textures. The depth texture still keeps
+         * its Z32F_S8 format though, and we also keep a reference to a separate
+         * S8 texture.
+         */
+        if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+                struct pipe_resource stencil = *tmpl;
+                stencil.format = PIPE_FORMAT_S8_UINT;
+                rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
+                if (!rsc->stencil)
+                        goto fail;
+        }
+        return prsc;
+fail:
+        fd_resource_destroy(pscreen, prsc);
+        return NULL;
+}
+/**
+ * Create a texture from a winsys_handle. The handle is often created in
+ * another process by first creating a pipe texture and then calling
+ * resource_get_handle.
+ */
+static struct pipe_resource *
+fd_resource_from_handle(struct pipe_screen *pscreen,
+                const struct pipe_resource *tmpl,
+                struct winsys_handle *handle)
+{
+        struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
+        struct fd_resource_slice *slice = &rsc->slices[0];
+        struct pipe_resource *prsc = &rsc->base.b;
+        DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
+                        "nr_samples=%u, usage=%u, bind=%x, flags=%x",
+                        tmpl->target, util_format_name(tmpl->format),
+                        tmpl->width0, tmpl->height0, tmpl->depth0,
+                        tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
+                        tmpl->usage, tmpl->bind, tmpl->flags);
+        if (!rsc)
+                return NULL;
+        *prsc = *tmpl;
+        pipe_reference_init(&prsc->reference, 1);
+        list_inithead(&rsc->list);
+        prsc->screen = pscreen;
+        util_range_init(&rsc->valid_buffer_range);
+        rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &slice->pitch);
+        if (!rsc->bo)
+                goto fail;
+        rsc->base.vtbl = &fd_resource_vtbl;
+        rsc->cpp = util_format_get_blocksize(tmpl->format);
+        slice->pitch /= rsc->cpp;
+        assert(rsc->cpp);
+        return prsc;
+fail:
+        fd_resource_destroy(pscreen, prsc);
+        return NULL;
+}
+static void fd_blitter_pipe_begin(struct fd_context *ctx);
+static void fd_blitter_pipe_end(struct fd_context *ctx);
+/**
+ * _copy_region using pipe (3d engine)
+ */
+static bool
+fd_blitter_pipe_copy_region(struct fd_context *ctx,
+                struct pipe_resource *dst,
+                unsigned dst_level,
+                unsigned dstx, unsigned dsty, unsigned dstz,
+                struct pipe_resource *src,
+                unsigned src_level,
+                const struct pipe_box *src_box)
+{
+        /* not until we allow rendertargets to be buffers */
+        if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
+                return false;
+        if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
+                return false;
+        fd_blitter_pipe_begin(ctx);
+        util_blitter_copy_texture(ctx->blitter,
+                        dst, dst_level, dstx, dsty, dstz,
+                        src, src_level, src_box);
+        fd_blitter_pipe_end(ctx);
+        return true;
+}
+/**
+ * Copy a block of pixels from one resource to another.
+ * The resource must be of the same format.
+ * Resources with nr_samples > 1 are not allowed.
+ */
+static void
+fd_resource_copy_region(struct pipe_context *pctx,
+                struct pipe_resource *dst,
+                unsigned dst_level,
+                unsigned dstx, unsigned dsty, unsigned dstz,
+                struct pipe_resource *src,
+                unsigned src_level,
+                const struct pipe_box *src_box)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        /* TODO if we have 2d core, or other DMA engine that could be used
+         * for simple copies and reasonably easily synchronized with the 3d
+         * core, this is where we'd plug it in..
+         */
+        /* try blit on 3d pipe: */
+        if (fd_blitter_pipe_copy_region(ctx,
+                        dst, dst_level, dstx, dsty, dstz,
+                        src, src_level, src_box))
+                return;
+        /* else fallback to pure sw: */
+        util_resource_copy_region(pctx,
+                        dst, dst_level, dstx, dsty, dstz,
+                        src, src_level, src_box);
+}
+/**
+ * Optimal hardware path for blitting pixels.
+ * Scaling, format conversion, up- and downsampling (resolve) are allowed.
+ */
+static void
+fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct pipe_blit_info info = *blit_info;
+        if (info.src.resource->nr_samples > 1 &&
+                        info.dst.resource->nr_samples <= 1 &&
+                        !util_format_is_depth_or_stencil(info.src.resource->format) &&
+                        !util_format_is_pure_integer(info.src.resource->format)) {
+                DBG("color resolve unimplemented");
+                return;
+        }
+        if (util_try_blit_via_copy_region(pctx, &info)) {
+                return; /* done */
+        }
+        if (info.mask & PIPE_MASK_S) {
+                DBG("cannot blit stencil, skipping");
+                info.mask &= ~PIPE_MASK_S;
+        }
+        if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
+                DBG("blit unsupported %s -> %s",
+                                util_format_short_name(info.src.resource->format),
+                                util_format_short_name(info.dst.resource->format));
+                return;
+        }
+        fd_blitter_pipe_begin(ctx);
+        util_blitter_blit(ctx->blitter, &info);
+        fd_blitter_pipe_end(ctx);
+}
+static void
+fd_blitter_pipe_begin(struct fd_context *ctx)
+{
+        util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
+        util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
+        util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
+        util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
+        util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
+        util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
+        util_blitter_save_fragment_shader(ctx->blitter, ctx->prog.fp);
+        util_blitter_save_blend(ctx->blitter, ctx->blend);
+        util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->zsa);
+        util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
+        util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);
+        util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);
+        util_blitter_save_fragment_sampler_states(ctx->blitter,
+                        ctx->fragtex.num_samplers,
+                        (void **)ctx->fragtex.samplers);
+        util_blitter_save_fragment_sampler_views(ctx->blitter,
+                        ctx->fragtex.num_textures, ctx->fragtex.textures);
+        fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
+}
+static void
+fd_blitter_pipe_end(struct fd_context *ctx)
+{
+        fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+}
+static void
+fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
+{
+        struct fd_resource *rsc = fd_resource(prsc);
+        if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty))
+                fd_context_render(pctx);
+}
+void
+fd_resource_screen_init(struct pipe_screen *pscreen)
+{
+        pscreen->resource_create = fd_resource_create;
+        pscreen->resource_from_handle = fd_resource_from_handle;
+        pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+        pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
+void
+fd_resource_context_init(struct pipe_context *pctx)
+{
+        pctx->transfer_map = u_transfer_map_vtbl;
+        pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
+        pctx->transfer_unmap = u_transfer_unmap_vtbl;
+        pctx->transfer_inline_write = u_transfer_inline_write_vtbl;
+        pctx->create_surface = fd_create_surface;
+        pctx->surface_destroy = fd_surface_destroy;
+        pctx->resource_copy_region = fd_resource_copy_region;
+        pctx->blit = fd_blit;
+        pctx->flush_resource = fd_flush_resource;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_resource.h
 ,0 → 1,123
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_RESOURCE_H_
+#define FREEDRENO_RESOURCE_H_
+#include "util/list.h"
+#include "util/u_range.h"
+#include "util/u_transfer.h"
+#include "freedreno_util.h"
+/* Texture Layout on a3xx:
+ *
+ * Each mipmap-level contains all of it's layers (ie. all cubmap
+ * faces, all 1d/2d array elements, etc).  The texture sampler is
+ * programmed with the start address of each mipmap level, and hw
+ * derives the layer offset within the level.
+ *
+ * Texture Layout on a4xx:
+ *
+ * For cubemap and 2d array, each layer contains all of it's mipmap
+ * levels (layer_first layout).
+ *
+ * 3d textures are layed out as on a3xx, but unknown about 3d-array
+ * textures.
+ *
+ * In either case, the slice represents the per-miplevel information,
+ * but in layer_first layout it only includes the first layer, and
+ * an additional offset of (rsc->layer_size * layer) must be added.
+ */
+struct fd_resource_slice {
+        uint32_t offset;         /* offset of first layer in slice */
+        uint32_t pitch;
+        uint32_t size0;          /* size of first layer in slice */
+};
+struct fd_resource {
+        struct u_resource base;
+        struct fd_bo *bo;
+        uint32_t cpp;
+        bool layer_first;        /* see above description */
+        uint32_t layer_size;
+        struct fd_resource_slice slices[MAX_MIP_LEVELS];
+        uint32_t timestamp;
+        bool dirty, reading;
+        /* buffer range that has been initialized */
+        struct util_range valid_buffer_range;
+        /* reference to the resource holding stencil data for a z32_s8 texture */
+        struct fd_resource *stencil;
+        struct list_head list;
+};
+static INLINE struct fd_resource *
+fd_resource(struct pipe_resource *ptex)
+{
+        return (struct fd_resource *)ptex;
+}
+struct fd_transfer {
+        struct pipe_transfer base;
+        void *staging;
+};
+static INLINE struct fd_transfer *
+fd_transfer(struct pipe_transfer *ptrans)
+{
+        return (struct fd_transfer *)ptrans;
+}
+static INLINE struct fd_resource_slice *
+fd_resource_slice(struct fd_resource *rsc, unsigned level)
+{
+        assert(level <= rsc->base.b.last_level);
+        return &rsc->slices[level];
+}
+/* get offset for specified mipmap level and texture/array layer */
+static INLINE uint32_t
+fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
+{
+        struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
+        unsigned offset;
+        if (rsc->layer_first) {
+                offset = slice->offset + (rsc->layer_size * layer);
+        } else {
+                offset = slice->offset + (slice->size0 * layer);
+        }
+        debug_assert(offset < fd_bo_size(rsc->bo));
+        return offset;
+}
+void fd_resource_screen_init(struct pipe_screen *pscreen);
+void fd_resource_context_init(struct pipe_context *pctx);
+#endif /* FREEDRENO_RESOURCE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_screen.c
 ,0 → 1,561
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_string.h"
+#include "util/u_debug.h"
+#include "os/os_time.h"
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "freedreno_screen.h"
+#include "freedreno_resource.h"
+#include "freedreno_fence.h"
+#include "freedreno_query.h"
+#include "freedreno_util.h"
+#include "a2xx/fd2_screen.h"
+#include "a3xx/fd3_screen.h"
+#include "a4xx/fd4_screen.h"
+/* XXX this should go away */
+#include "state_tracker/drm_driver.h"
+static const struct debug_named_value debug_options[] = {
+                {"msgs",      FD_DBG_MSGS,   "Print debug messages"},
+                {"disasm",    FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"},
+                {"dclear",    FD_DBG_DCLEAR, "Mark all state dirty after clear"},
+                {"flush",     FD_DBG_FLUSH,  "Force flush after every draw"},
+                {"noscis",    FD_DBG_NOSCIS, "Disable scissor optimization"},
+                {"direct",    FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},
+                {"nobypass",  FD_DBG_NOBYPASS, "Disable GMEM bypass"},
+                {"fraghalf",  FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
+                {"nobin",     FD_DBG_NOBIN,  "Disable hw binning"},
+                {"optmsgs",   FD_DBG_OPTMSGS,"Enable optimizer debug messages"},
+                {"optdump",   FD_DBG_OPTDUMP,"Dump shader DAG to .dot files"},
+                {"glsl120",   FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"},
+                {"nocp",      FD_DBG_NOCP,   "Disable copy-propagation"},
+                {"nir",       FD_DBG_NIR,    "Enable experimental NIR compiler"},
+                DEBUG_NAMED_VALUE_END
+};
+DEBUG_GET_ONCE_FLAGS_OPTION(fd_mesa_debug, "FD_MESA_DEBUG", debug_options, 0)
+int fd_mesa_debug = 0;
+bool fd_binning_enabled = true;
+static bool glsl120 = false;
+static const char *
+fd_screen_get_name(struct pipe_screen *pscreen)
+{
+        static char buffer[128];
+        util_snprintf(buffer, sizeof(buffer), "FD%03d",
+                        fd_screen(pscreen)->device_id);
+        return buffer;
+}
+static const char *
+fd_screen_get_vendor(struct pipe_screen *pscreen)
+{
+        return "freedreno";
+}
+static const char *
+fd_screen_get_device_vendor(struct pipe_screen *pscreen)
+{
+        return "Qualcomm";
+}
+static uint64_t
+fd_screen_get_timestamp(struct pipe_screen *pscreen)
+{
+        int64_t cpu_time = os_time_get() * 1000;
+        return cpu_time + fd_screen(pscreen)->cpu_gpu_time_delta;
+}
+static void
+fd_screen_destroy(struct pipe_screen *pscreen)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        if (screen->pipe)
+                fd_pipe_del(screen->pipe);
+        if (screen->dev)
+                fd_device_del(screen->dev);
+        free(screen);
+}
+/*
+TODO either move caps to a2xx/a3xx specific code, or maybe have some
+tables for things that differ if the delta is not too much..
+ */
+static int
+fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        /* this is probably not totally correct.. but it's a start: */
+        switch (param) {
+        /* Supported features (boolean caps). */
+        case PIPE_CAP_NPOT_TEXTURES:
+        case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+        case PIPE_CAP_TWO_SIDED_STENCIL:
+        case PIPE_CAP_ANISOTROPIC_FILTER:
+        case PIPE_CAP_POINT_SPRITE:
+        case PIPE_CAP_TEXTURE_SHADOW_MAP:
+        case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+        case PIPE_CAP_TEXTURE_SWIZZLE:
+        case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+        case PIPE_CAP_SEAMLESS_CUBE_MAP:
+        case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+        case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+        case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+        case PIPE_CAP_USER_CONSTANT_BUFFERS:
+        case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+        case PIPE_CAP_VERTEXID_NOBASE:
+                return 1;
+        case PIPE_CAP_SHADER_STENCIL_EXPORT:
+        case PIPE_CAP_TGSI_TEXCOORD:
+        case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+        case PIPE_CAP_CONDITIONAL_RENDER:
+        case PIPE_CAP_TEXTURE_MULTISAMPLE:
+        case PIPE_CAP_TEXTURE_BARRIER:
+        case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+        case PIPE_CAP_CUBE_MAP_ARRAY:
+        case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+        case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+        case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+        case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+        case PIPE_CAP_START_INSTANCE:
+        case PIPE_CAP_COMPUTE:
+                return 0;
+        case PIPE_CAP_SM3:
+        case PIPE_CAP_PRIMITIVE_RESTART:
+        case PIPE_CAP_TGSI_INSTANCEID:
+        case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+                return is_a3xx(screen) || is_a4xx(screen);
+        case PIPE_CAP_INDEP_BLEND_ENABLE:
+        case PIPE_CAP_INDEP_BLEND_FUNC:
+        case PIPE_CAP_DEPTH_CLIP_DISABLE:
+                return is_a3xx(screen);
+        case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+                return 256;
+        case PIPE_CAP_GLSL_FEATURE_LEVEL:
+                if (glsl120)
+                        return 120;
+                return (is_a3xx(screen) || is_a4xx(screen)) ? 130 : 120;
+        /* Unsupported features. */
+        case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+        case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+        case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+        case PIPE_CAP_USER_VERTEX_BUFFERS:
+        case PIPE_CAP_USER_INDEX_BUFFERS:
+        case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+        case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+        case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+        case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+        case PIPE_CAP_TEXTURE_GATHER_SM5:
+        case PIPE_CAP_FAKE_SW_MSAA:
+        case PIPE_CAP_TEXTURE_QUERY_LOD:
+        case PIPE_CAP_SAMPLE_SHADING:
+        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+        case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+        case PIPE_CAP_DRAW_INDIRECT:
+        case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+        case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+        case PIPE_CAP_SAMPLER_VIEW_TARGET:
+        case PIPE_CAP_CLIP_HALFZ:
+        case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+        case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+        case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+                return 0;
+        case PIPE_CAP_MAX_VIEWPORTS:
+                return 1;
+        /* Stream output. */
+        case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+        case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+        case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+        case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+                return 0;
+        /* Geometry shader output, unsupported. */
+        case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+        case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+        case PIPE_CAP_MAX_VERTEX_STREAMS:
+                return 0;
+        case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+                return 2048;
+        /* Texturing. */
+        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+        case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+                return MAX_MIP_LEVELS;
+        case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+                return 11;
+        case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+                return (is_a3xx(screen) || is_a4xx(screen)) ? 256 : 0;
+        /* Render targets. */
+        case PIPE_CAP_MAX_RENDER_TARGETS:
+                return screen->max_rts;
+        /* Queries. */
+        case PIPE_CAP_QUERY_TIME_ELAPSED:
+        case PIPE_CAP_QUERY_TIMESTAMP:
+                return 0;
+        case PIPE_CAP_OCCLUSION_QUERY:
+                /* TODO still missing on a4xx, but we lie to get gl2..
+                 * it's not a feature, it's a bug!
+                 */
+                return is_a3xx(screen) || is_a4xx(screen);
+        case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+        case PIPE_CAP_MIN_TEXEL_OFFSET:
+                return -8;
+        case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+        case PIPE_CAP_MAX_TEXEL_OFFSET:
+                return 7;
+        case PIPE_CAP_ENDIANNESS:
+                return PIPE_ENDIAN_LITTLE;
+        case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+                return 64;
+        case PIPE_CAP_VENDOR_ID:
+                return 0x5143;
+        case PIPE_CAP_DEVICE_ID:
+                return 0xFFFFFFFF;
+        case PIPE_CAP_ACCELERATED:
+                return 1;
+        case PIPE_CAP_VIDEO_MEMORY:
+                DBG("FINISHME: The value returned is incorrect\n");
+                return 10;
+        case PIPE_CAP_UMA:
+                return 1;
+        }
+        debug_printf("unknown param %d\n", param);
+        return 0;
+}
+static float
+fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+        switch (param) {
+        case PIPE_CAPF_MAX_LINE_WIDTH:
+        case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+        case PIPE_CAPF_MAX_POINT_WIDTH:
+        case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+                return 4092.0f;
+        case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+                return 16.0f;
+        case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+                return 15.0f;
+        case PIPE_CAPF_GUARD_BAND_LEFT:
+        case PIPE_CAPF_GUARD_BAND_TOP:
+        case PIPE_CAPF_GUARD_BAND_RIGHT:
+        case PIPE_CAPF_GUARD_BAND_BOTTOM:
+                return 0.0f;
+        }
+        debug_printf("unknown paramf %d\n", param);
+        return 0;
+}
+static int
+fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+                enum pipe_shader_cap param)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        switch(shader)
+        {
+        case PIPE_SHADER_FRAGMENT:
+        case PIPE_SHADER_VERTEX:
+                break;
+        case PIPE_SHADER_COMPUTE:
+        case PIPE_SHADER_GEOMETRY:
+                /* maye we could emulate.. */
+                return 0;
+        default:
+                DBG("unknown shader type %d", shader);
+                return 0;
+        }
+        /* this is probably not totally correct.. but it's a start: */
+        switch (param) {
+        case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+        case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+                return 16384;
+        case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+                return 8; /* XXX */
+        case PIPE_SHADER_CAP_MAX_INPUTS:
+        case PIPE_SHADER_CAP_MAX_OUTPUTS:
+                return 16;
+        case PIPE_SHADER_CAP_MAX_TEMPS:
+                return 64; /* Max native temporaries. */
+        case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+                /* NOTE: seems to be limit for a3xx is actually 512 but
+                 * split between VS and FS.  Use lower limit of 256 to
+                 * avoid getting into impossible situations:
+                 */
+                return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]);
+        case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+                return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1;
+        case PIPE_SHADER_CAP_MAX_PREDS:
+                return 0; /* nothing uses this */
+        case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+                return 1;
+        case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+        case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+        case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+        case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+                return 1;
+        case PIPE_SHADER_CAP_SUBROUTINES:
+        case PIPE_SHADER_CAP_DOUBLES:
+        case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+                return 0;
+        case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+                return 1;
+        case PIPE_SHADER_CAP_INTEGERS:
+                if (glsl120)
+                        return 0;
+                return (is_a3xx(screen) || is_a4xx(screen)) ? 1 : 0;
+        case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+        case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+                return 16;
+        case PIPE_SHADER_CAP_PREFERRED_IR:
+                return PIPE_SHADER_IR_TGSI;
+        }
+        debug_printf("unknown shader param %d\n", param);
+        return 0;
+}
+boolean
+fd_screen_bo_get_handle(struct pipe_screen *pscreen,
+                struct fd_bo *bo,
+                unsigned stride,
+                struct winsys_handle *whandle)
+{
+        whandle->stride = stride;
+        if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+                return fd_bo_get_name(bo, &whandle->handle) == 0;
+        } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+                whandle->handle = fd_bo_handle(bo);
+                return TRUE;
+        } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
+                whandle->handle = fd_bo_dmabuf(bo);
+                return TRUE;
+        } else {
+                return FALSE;
+        }
+}
+struct fd_bo *
+fd_screen_bo_from_handle(struct pipe_screen *pscreen,
+                struct winsys_handle *whandle,
+                unsigned *out_stride)
+{
+        struct fd_screen *screen = fd_screen(pscreen);
+        struct fd_bo *bo;
+        if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+                bo = fd_bo_from_name(screen->dev, whandle->handle);
+        } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+                bo = fd_bo_from_handle(screen->dev, whandle->handle, 0);
+        } else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
+                bo = fd_bo_from_dmabuf(screen->dev, whandle->handle);
+        } else {
+                DBG("Attempt to import unsupported handle type %d", whandle->type);
+                return NULL;
+        }
+        if (!bo) {
+                DBG("ref name 0x%08x failed", whandle->handle);
+                return NULL;
+        }
+        *out_stride = whandle->stride;
+        return bo;
+}
+struct pipe_screen *
+fd_screen_create(struct fd_device *dev)
+{
+        struct fd_screen *screen = CALLOC_STRUCT(fd_screen);
+        struct pipe_screen *pscreen;
+        uint64_t val;
+        fd_mesa_debug = debug_get_option_fd_mesa_debug();
+        if (fd_mesa_debug & FD_DBG_NOBIN)
+                fd_binning_enabled = false;
+        glsl120 = !!(fd_mesa_debug & FD_DBG_GLSL120);
+        if (!screen)
+                return NULL;
+        pscreen = &screen->base;
+        screen->dev = dev;
+        // maybe this should be in context?
+        screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
+        if (!screen->pipe) {
+                DBG("could not create 3d pipe");
+                goto fail;
+        }
+        if (fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val)) {
+                DBG("could not get GMEM size");
+                goto fail;
+        }
+        screen->gmemsize_bytes = val;
+        if (fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val)) {
+                DBG("could not get device-id");
+                goto fail;
+        }
+        screen->device_id = val;
+        if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
+                DBG("could not get gpu-id");
+                goto fail;
+        }
+        screen->gpu_id = val;
+        if (fd_pipe_get_param(screen->pipe, FD_CHIP_ID, &val)) {
+                DBG("could not get chip-id");
+                /* older kernels may not have this property: */
+                unsigned core  = screen->gpu_id / 100;
+                unsigned major = (screen->gpu_id % 100) / 10;
+                unsigned minor = screen->gpu_id % 10;
+                unsigned patch = 0;  /* assume the worst */
+                val = (patch & 0xff) | ((minor & 0xff) << 8) |
+                        ((major & 0xff) << 16) | ((core & 0xff) << 24);
+        }
+        screen->chip_id = val;
+        DBG("Pipe Info:");
+        DBG(" GPU-id:          %d", screen->gpu_id);
+        DBG(" Chip-id:         0x%08x", screen->chip_id);
+        DBG(" GMEM size:       0x%08x", screen->gmemsize_bytes);
+        /* explicitly checking for GPU revisions that are known to work.  This
+         * may be overly conservative for a3xx, where spoofing the gpu_id with
+         * the blob driver seems to generate identical cmdstream dumps.  But
+         * on a2xx, there seem to be small differences between the GPU revs
+         * so it is probably better to actually test first on real hardware
+         * before enabling:
+         *
+         * If you have a different adreno version, feel free to add it to one
+         * of the cases below and see what happens.  And if it works, please
+         * send a patch ;-)
+         */
+        switch (screen->gpu_id) {
+        case 220:
+                fd2_screen_init(pscreen);
+                break;
+        case 307:
+        case 320:
+        case 330:
+                fd3_screen_init(pscreen);
+                break;
+        case 420:
+                fd4_screen_init(pscreen);
+                break;
+        default:
+                debug_printf("unsupported GPU: a%03d\n", screen->gpu_id);
+                goto fail;
+        }
+        pscreen->destroy = fd_screen_destroy;
+        pscreen->get_param = fd_screen_get_param;
+        pscreen->get_paramf = fd_screen_get_paramf;
+        pscreen->get_shader_param = fd_screen_get_shader_param;
+        fd_resource_screen_init(pscreen);
+        fd_query_screen_init(pscreen);
+        pscreen->get_name = fd_screen_get_name;
+        pscreen->get_vendor = fd_screen_get_vendor;
+        pscreen->get_device_vendor = fd_screen_get_device_vendor;
+        pscreen->get_timestamp = fd_screen_get_timestamp;
+        pscreen->fence_reference = fd_screen_fence_ref;
+        pscreen->fence_signalled = fd_screen_fence_signalled;
+        pscreen->fence_finish = fd_screen_fence_finish;
+        util_format_s3tc_init();
+        return pscreen;
+fail:
+        fd_screen_destroy(pscreen);
+        return NULL;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_screen.h
 ,0 → 1,92
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_SCREEN_H_
+#define FREEDRENO_SCREEN_H_
+#include <freedreno_drmif.h>
+#include <freedreno_ringbuffer.h>
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+typedef uint32_t u32;
+struct fd_bo;
+struct fd_screen {
+        struct pipe_screen base;
+        uint32_t gmemsize_bytes;
+        uint32_t device_id;
+        uint32_t gpu_id;         /* 220, 305, etc */
+        uint32_t chip_id;        /* coreid:8 majorrev:8 minorrev:8 patch:8 */
+        uint32_t max_rts;
+        struct fd_device *dev;
+        struct fd_pipe *pipe;
+        int64_t cpu_gpu_time_delta;
+};
+static INLINE struct fd_screen *
+fd_screen(struct pipe_screen *pscreen)
+{
+        return (struct fd_screen *)pscreen;
+}
+boolean fd_screen_bo_get_handle(struct pipe_screen *pscreen,
+                struct fd_bo *bo,
+                unsigned stride,
+                struct winsys_handle *whandle);
+struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen,
+                struct winsys_handle *whandle,
+                unsigned *out_stride);
+struct pipe_screen * fd_screen_create(struct fd_device *dev);
+/* is a3xx patch revision 0? */
+static inline boolean
+is_a3xx_p0(struct fd_screen *screen)
+{
+        return (screen->chip_id & 0xff0000ff) == 0x03000000;
+}
+static inline boolean
+is_a3xx(struct fd_screen *screen)
+{
+        return (screen->gpu_id >= 300) && (screen->gpu_id < 400);
+}
+static inline boolean
+is_a4xx(struct fd_screen *screen)
+{
+        return (screen->gpu_id >= 400) && (screen->gpu_id < 500);
+}
+#endif /* FREEDRENO_SCREEN_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_state.c
 ,0 → 1,331
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "freedreno_state.h"
+#include "freedreno_context.h"
+#include "freedreno_resource.h"
+#include "freedreno_texture.h"
+#include "freedreno_gmem.h"
+#include "freedreno_util.h"
+/* All the generic state handling.. In case of CSO's that are specific
+ * to the GPU version, when the bind and the delete are common they can
+ * go in here.
+ */
+static void
+fd_set_blend_color(struct pipe_context *pctx,
+                const struct pipe_blend_color *blend_color)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->blend_color = *blend_color;
+        ctx->dirty |= FD_DIRTY_BLEND_COLOR;
+}
+static void
+fd_set_stencil_ref(struct pipe_context *pctx,
+                const struct pipe_stencil_ref *stencil_ref)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->stencil_ref =* stencil_ref;
+        ctx->dirty |= FD_DIRTY_STENCIL_REF;
+}
+static void
+fd_set_clip_state(struct pipe_context *pctx,
+                const struct pipe_clip_state *clip)
+{
+        DBG("TODO: ");
+}
+static void
+fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->sample_mask = (uint16_t)sample_mask;
+        ctx->dirty |= FD_DIRTY_SAMPLE_MASK;
+}
+/* notes from calim on #dri-devel:
+ * index==0 will be non-UBO (ie. glUniformXYZ()) all packed together padded
+ * out to vec4's
+ * I should be able to consider that I own the user_ptr until the next
+ * set_constant_buffer() call, at which point I don't really care about the
+ * previous values.
+ * index>0 will be UBO's.. well, I'll worry about that later
+ */
+static void
+fd_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
+                struct pipe_constant_buffer *cb)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd_constbuf_stateobj *so = &ctx->constbuf[shader];
+        /* Note that the state tracker can unbind constant buffers by
+         * passing NULL here.
+         */
+        if (unlikely(!cb)) {
+                so->enabled_mask &= ~(1 << index);
+                so->dirty_mask &= ~(1 << index);
+                pipe_resource_reference(&so->cb[index].buffer, NULL);
+                return;
+        }
+        pipe_resource_reference(&so->cb[index].buffer, cb->buffer);
+        so->cb[index].buffer_offset = cb->buffer_offset;
+        so->cb[index].buffer_size   = cb->buffer_size;
+        so->cb[index].user_buffer   = cb->user_buffer;
+        so->enabled_mask |= 1 << index;
+        so->dirty_mask |= 1 << index;
+        ctx->dirty |= FD_DIRTY_CONSTBUF;
+}
+static void
+fd_set_framebuffer_state(struct pipe_context *pctx,
+                const struct pipe_framebuffer_state *framebuffer)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct pipe_framebuffer_state *cso = &ctx->framebuffer;
+        DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush,
+                        framebuffer->cbufs[0], framebuffer->zsbuf);
+        fd_context_render(pctx);
+        if ((cso->width != framebuffer->width) ||
+                        (cso->height != framebuffer->height))
+                ctx->needs_rb_fbd = true;
+        util_copy_framebuffer_state(cso, framebuffer);
+        ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
+        ctx->disabled_scissor.minx = 0;
+        ctx->disabled_scissor.miny = 0;
+        ctx->disabled_scissor.maxx = cso->width;
+        ctx->disabled_scissor.maxy = cso->height;
+        ctx->dirty |= FD_DIRTY_SCISSOR;
+}
+static void
+fd_set_polygon_stipple(struct pipe_context *pctx,
+                const struct pipe_poly_stipple *stipple)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->stipple = *stipple;
+        ctx->dirty |= FD_DIRTY_STIPPLE;
+}
+static void
+fd_set_scissor_states(struct pipe_context *pctx,
+                unsigned start_slot,
+                unsigned num_scissors,
+                const struct pipe_scissor_state *scissor)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->scissor = *scissor;
+        ctx->dirty |= FD_DIRTY_SCISSOR;
+}
+static void
+fd_set_viewport_states(struct pipe_context *pctx,
+                unsigned start_slot,
+                unsigned num_viewports,
+                const struct pipe_viewport_state *viewport)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->viewport = *viewport;
+        ctx->dirty |= FD_DIRTY_VIEWPORT;
+}
+static void
+fd_set_vertex_buffers(struct pipe_context *pctx,
+                unsigned start_slot, unsigned count,
+                const struct pipe_vertex_buffer *vb)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct fd_vertexbuf_stateobj *so = &ctx->vtx.vertexbuf;
+        int i;
+        /* on a2xx, pitch is encoded in the vtx fetch instruction, so
+         * we need to mark VTXSTATE as dirty as well to trigger patching
+         * and re-emitting the vtx shader:
+         */
+        for (i = 0; i < count; i++) {
+                bool new_enabled = vb && (vb[i].buffer || vb[i].user_buffer);
+                bool old_enabled = so->vb[i].buffer || so->vb[i].user_buffer;
+                uint32_t new_stride = vb ? vb[i].stride : 0;
+                uint32_t old_stride = so->vb[i].stride;
+                if ((new_enabled != old_enabled) || (new_stride != old_stride)) {
+                        ctx->dirty |= FD_DIRTY_VTXSTATE;
+                        break;
+                }
+        }
+        util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count);
+        so->count = util_last_bit(so->enabled_mask);
+        ctx->dirty |= FD_DIRTY_VTXBUF;
+}
+static void
+fd_set_index_buffer(struct pipe_context *pctx,
+                const struct pipe_index_buffer *ib)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        if (ib) {
+                pipe_resource_reference(&ctx->indexbuf.buffer, ib->buffer);
+                ctx->indexbuf.index_size = ib->index_size;
+                ctx->indexbuf.offset = ib->offset;
+                ctx->indexbuf.user_buffer = ib->user_buffer;
+        } else {
+                pipe_resource_reference(&ctx->indexbuf.buffer, NULL);
+        }
+        ctx->dirty |= FD_DIRTY_INDEXBUF;
+}
+static void
+fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->blend = hwcso;
+        ctx->dirty |= FD_DIRTY_BLEND;
+}
+static void
+fd_blend_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        FREE(hwcso);
+}
+static void
+fd_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        struct pipe_scissor_state *old_scissor = fd_context_get_scissor(ctx);
+        ctx->rasterizer = hwcso;
+        ctx->dirty |= FD_DIRTY_RASTERIZER;
+        /* if scissor enable bit changed we need to mark scissor
+         * state as dirty as well:
+         * NOTE: we can do a shallow compare, since we only care
+         * if it changed to/from &ctx->disable_scissor
+         */
+        if (old_scissor != fd_context_get_scissor(ctx))
+                ctx->dirty |= FD_DIRTY_SCISSOR;
+}
+static void
+fd_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        FREE(hwcso);
+}
+static void
+fd_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->zsa = hwcso;
+        ctx->dirty |= FD_DIRTY_ZSA;
+}
+static void
+fd_zsa_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        FREE(hwcso);
+}
+static void *
+fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+                const struct pipe_vertex_element *elements)
+{
+        struct fd_vertex_stateobj *so = CALLOC_STRUCT(fd_vertex_stateobj);
+        if (!so)
+                return NULL;
+        memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+        so->num_elements = num_elements;
+        return so;
+}
+static void
+fd_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        FREE(hwcso);
+}
+static void
+fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        ctx->vtx.vtx = hwcso;
+        ctx->dirty |= FD_DIRTY_VTXSTATE;
+}
+void
+fd_state_init(struct pipe_context *pctx)
+{
+        pctx->set_blend_color = fd_set_blend_color;
+        pctx->set_stencil_ref = fd_set_stencil_ref;
+        pctx->set_clip_state = fd_set_clip_state;
+        pctx->set_sample_mask = fd_set_sample_mask;
+        pctx->set_constant_buffer = fd_set_constant_buffer;
+        pctx->set_framebuffer_state = fd_set_framebuffer_state;
+        pctx->set_polygon_stipple = fd_set_polygon_stipple;
+        pctx->set_scissor_states = fd_set_scissor_states;
+        pctx->set_viewport_states = fd_set_viewport_states;
+        pctx->set_vertex_buffers = fd_set_vertex_buffers;
+        pctx->set_index_buffer = fd_set_index_buffer;
+        pctx->bind_blend_state = fd_blend_state_bind;
+        pctx->delete_blend_state = fd_blend_state_delete;
+        pctx->bind_rasterizer_state = fd_rasterizer_state_bind;
+        pctx->delete_rasterizer_state = fd_rasterizer_state_delete;
+        pctx->bind_depth_stencil_alpha_state = fd_zsa_state_bind;
+        pctx->delete_depth_stencil_alpha_state = fd_zsa_state_delete;
+        pctx->create_vertex_elements_state = fd_vertex_state_create;
+        pctx->delete_vertex_elements_state = fd_vertex_state_delete;
+        pctx->bind_vertex_elements_state = fd_vertex_state_bind;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_state.h
 ,0 → 1,57
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_STATE_H_
+#define FREEDRENO_STATE_H_
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+static inline bool fd_depth_enabled(struct fd_context *ctx)
+{
+        return ctx->zsa && ctx->zsa->depth.enabled;
+}
+static inline bool fd_stencil_enabled(struct fd_context *ctx)
+{
+        return ctx->zsa && ctx->zsa->stencil[0].enabled;
+}
+static inline bool fd_logicop_enabled(struct fd_context *ctx)
+{
+        return ctx->blend && ctx->blend->logicop_enable;
+}
+static inline bool fd_blend_enabled(struct fd_context *ctx, unsigned n)
+{
+        return ctx->blend && ctx->blend->rt[n].blend_enable;
+}
+void fd_state_init(struct pipe_context *pctx);
+#endif /* FREEDRENO_STATE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_surface.c
 ,0 → 1,73
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "freedreno_surface.h"
+#include "freedreno_resource.h"
+#include "freedreno_util.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+struct pipe_surface *
+fd_create_surface(struct pipe_context *pctx,
+                struct pipe_resource *ptex,
+                const struct pipe_surface *surf_tmpl)
+{
+//      struct fd_resource* tex = fd_resource(ptex);
+        struct fd_surface* surface = CALLOC_STRUCT(fd_surface);
+        assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+        if (surface) {
+                struct pipe_surface *psurf = &surface->base;
+                unsigned level = surf_tmpl->u.tex.level;
+                pipe_reference_init(&psurf->reference, 1);
+                pipe_resource_reference(&psurf->texture, ptex);
+                psurf->context = pctx;
+                psurf->format = surf_tmpl->format;
+                psurf->width = u_minify(ptex->width0, level);
+                psurf->height = u_minify(ptex->height0, level);
+                psurf->u.tex.level = level;
+                psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+                psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+                // TODO
+                DBG("TODO: %ux%u", psurf->width, psurf->height);
+        }
+        return &surface->base;
+}
+void
+fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+        pipe_resource_reference(&psurf->texture, NULL);
+        FREE(psurf);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_surface.h
 ,0 → 1,54
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_SURFACE_H_
+#define FREEDRENO_SURFACE_H_
+#include "pipe/p_state.h"
+struct fd_surface {
+        struct pipe_surface base;
+        uint32_t offset;
+        uint32_t pitch;
+        uint32_t width;
+        uint16_t height;
+        uint16_t depth;
+};
+static INLINE struct fd_surface *
+fd_surface(struct pipe_surface *psurf)
+{
+        return (struct fd_surface *)psurf;
+}
+struct pipe_surface* fd_create_surface(struct pipe_context *pctx,
+                struct pipe_resource *ptex,
+                const struct pipe_surface *surf_tmpl);
+void fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf);
+#endif /* FREEDRENO_SURFACE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_texture.c
 ,0 → 1,164
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "freedreno_texture.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+static void
+fd_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+        FREE(hwcso);
+}
+static void
+fd_sampler_view_destroy(struct pipe_context *pctx,
+                struct pipe_sampler_view *view)
+{
+        pipe_resource_reference(&view->texture, NULL);
+        FREE(view);
+}
+static void bind_sampler_states(struct fd_texture_stateobj *tex,
+                unsigned nr, void **hwcso)
+{
+        unsigned i;
+        unsigned new_nr = 0;
+        for (i = 0; i < nr; i++) {
+                if (hwcso[i])
+                        new_nr = i + 1;
+                tex->samplers[i] = hwcso[i];
+                tex->dirty_samplers |= (1 << i);
+        }
+        for (; i < tex->num_samplers; i++) {
+                tex->samplers[i] = NULL;
+                tex->dirty_samplers |= (1 << i);
+        }
+        tex->num_samplers = new_nr;
+}
+static void set_sampler_views(struct fd_texture_stateobj *tex,
+                unsigned nr, struct pipe_sampler_view **views)
+{
+        unsigned i;
+        unsigned new_nr = 0;
+        for (i = 0; i < nr; i++) {
+                if (views[i])
+                        new_nr = i + 1;
+                pipe_sampler_view_reference(&tex->textures[i], views[i]);
+                tex->dirty_samplers |= (1 << i);
+        }
+        for (; i < tex->num_textures; i++) {
+                pipe_sampler_view_reference(&tex->textures[i], NULL);
+                tex->dirty_samplers |= (1 << i);
+        }
+        tex->num_textures = new_nr;
+}
+void
+fd_sampler_states_bind(struct pipe_context *pctx,
+                unsigned shader, unsigned start,
+                unsigned nr, void **hwcso)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        assert(start == 0);
+        if (shader == PIPE_SHADER_FRAGMENT) {
+                bind_sampler_states(&ctx->fragtex, nr, hwcso);
+                ctx->dirty |= FD_DIRTY_FRAGTEX;
+        }
+        else if (shader == PIPE_SHADER_VERTEX) {
+                bind_sampler_states(&ctx->verttex, nr, hwcso);
+                ctx->dirty |= FD_DIRTY_VERTTEX;
+        }
+}
+static void
+fd_fragtex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
+                struct pipe_sampler_view **views)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        /* on a2xx, since there is a flat address space for textures/samplers,
+         * a change in # of fragment textures/samplers will trigger patching and
+         * re-emitting the vertex shader:
+         */
+        if (nr != ctx->fragtex.num_textures)
+                ctx->dirty |= FD_DIRTY_TEXSTATE;
+        set_sampler_views(&ctx->fragtex, nr, views);
+        ctx->dirty |= FD_DIRTY_FRAGTEX;
+}
+static void
+fd_verttex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
+                struct pipe_sampler_view **views)
+{
+        struct fd_context *ctx = fd_context(pctx);
+        set_sampler_views(&ctx->verttex, nr, views);
+        ctx->dirty |= FD_DIRTY_VERTTEX;
+}
+void
+fd_set_sampler_views(struct pipe_context *pctx, unsigned shader,
+                     unsigned start, unsigned nr,
+                     struct pipe_sampler_view **views)
+{
+   assert(start == 0);
+   switch (shader) {
+   case PIPE_SHADER_FRAGMENT:
+      fd_fragtex_set_sampler_views(pctx, nr, views);
+      break;
+   case PIPE_SHADER_VERTEX:
+      fd_verttex_set_sampler_views(pctx, nr, views);
+      break;
+   default:
+      ;
+   }
+}
+void
+fd_texture_init(struct pipe_context *pctx)
+{
+        pctx->delete_sampler_state = fd_sampler_state_delete;
+        pctx->sampler_view_destroy = fd_sampler_view_destroy;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_texture.h
 ,0 → 1,44
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_TEXTURE_H_
+#define FREEDRENO_TEXTURE_H_
+#include "pipe/p_context.h"
+void fd_sampler_states_bind(struct pipe_context *pctx,
+                unsigned shader, unsigned start,
+                unsigned nr, void **hwcso);
+void fd_set_sampler_views(struct pipe_context *pctx, unsigned shader,
+                                                  unsigned start, unsigned nr,
+                                                  struct pipe_sampler_view **views);
+void fd_texture_init(struct pipe_context *pctx);
+#endif /* FREEDRENO_TEXTURE_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_util.c
 ,0 → 1,157
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+#include "freedreno_util.h"
+unsigned marker_cnt;
+enum adreno_rb_depth_format
+fd_pipe2depth(enum pipe_format format)
+{
+        switch (format) {
+        case PIPE_FORMAT_Z16_UNORM:
+                return DEPTHX_16;
+        case PIPE_FORMAT_Z24X8_UNORM:
+        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+        case PIPE_FORMAT_X8Z24_UNORM:
+        case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+                return DEPTHX_24_8;
+        case PIPE_FORMAT_Z32_FLOAT:
+        case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+                return DEPTHX_32;
+        default:
+                return ~0;
+        }
+}
+enum pc_di_index_size
+fd_pipe2index(enum pipe_format format)
+{
+        switch (format) {
+        case PIPE_FORMAT_I8_UINT:
+                return INDEX_SIZE_8_BIT;
+        case PIPE_FORMAT_I16_UINT:
+                return INDEX_SIZE_16_BIT;
+        case PIPE_FORMAT_I32_UINT:
+                return INDEX_SIZE_32_BIT;
+        default:
+                return ~0;
+        }
+}
+enum adreno_rb_blend_factor
+fd_blend_factor(unsigned factor)
+{
+        switch (factor) {
+        case PIPE_BLENDFACTOR_ONE:
+                return FACTOR_ONE;
+        case PIPE_BLENDFACTOR_SRC_COLOR:
+                return FACTOR_SRC_COLOR;
+        case PIPE_BLENDFACTOR_SRC_ALPHA:
+                return FACTOR_SRC_ALPHA;
+        case PIPE_BLENDFACTOR_DST_ALPHA:
+                return FACTOR_DST_ALPHA;
+        case PIPE_BLENDFACTOR_DST_COLOR:
+                return FACTOR_DST_COLOR;
+        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+                return FACTOR_SRC_ALPHA_SATURATE;
+        case PIPE_BLENDFACTOR_CONST_COLOR:
+                return FACTOR_CONSTANT_COLOR;
+        case PIPE_BLENDFACTOR_CONST_ALPHA:
+                return FACTOR_CONSTANT_ALPHA;
+        case PIPE_BLENDFACTOR_ZERO:
+        case 0:
+                return FACTOR_ZERO;
+        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+                return FACTOR_ONE_MINUS_SRC_COLOR;
+        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+                return FACTOR_ONE_MINUS_SRC_ALPHA;
+        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+                return FACTOR_ONE_MINUS_DST_ALPHA;
+        case PIPE_BLENDFACTOR_INV_DST_COLOR:
+                return FACTOR_ONE_MINUS_DST_COLOR;
+        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+                return FACTOR_ONE_MINUS_CONSTANT_COLOR;
+        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+                return FACTOR_ONE_MINUS_CONSTANT_ALPHA;
+        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+        case PIPE_BLENDFACTOR_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_SRC1_ALPHA:
+                /* I don't think these are supported */
+        default:
+                DBG("invalid blend factor: %x", factor);
+                return 0;
+        }
+}
+enum adreno_pa_su_sc_draw
+fd_polygon_mode(unsigned mode)
+{
+        switch (mode) {
+        case PIPE_POLYGON_MODE_POINT:
+                return PC_DRAW_POINTS;
+        case PIPE_POLYGON_MODE_LINE:
+                return PC_DRAW_LINES;
+        case PIPE_POLYGON_MODE_FILL:
+                return PC_DRAW_TRIANGLES;
+        default:
+                DBG("invalid polygon mode: %u", mode);
+                return 0;
+        }
+}
+enum adreno_stencil_op
+fd_stencil_op(unsigned op)
+{
+        switch (op) {
+        case PIPE_STENCIL_OP_KEEP:
+                return STENCIL_KEEP;
+        case PIPE_STENCIL_OP_ZERO:
+                return STENCIL_ZERO;
+        case PIPE_STENCIL_OP_REPLACE:
+                return STENCIL_REPLACE;
+        case PIPE_STENCIL_OP_INCR:
+                return STENCIL_INCR_CLAMP;
+        case PIPE_STENCIL_OP_DECR:
+                return STENCIL_DECR_CLAMP;
+        case PIPE_STENCIL_OP_INCR_WRAP:
+                return STENCIL_INCR_WRAP;
+        case PIPE_STENCIL_OP_DECR_WRAP:
+                return STENCIL_DECR_WRAP;
+        case PIPE_STENCIL_OP_INVERT:
+                return STENCIL_INVERT;
+        default:
+                DBG("invalid stencil op: %u", op);
+                return 0;
+        }
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/freedreno_util.h
 ,0 → 1,267
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef FREEDRENO_UTIL_H_
+#define FREEDRENO_UTIL_H_
+#include <freedreno_drmif.h>
+#include <freedreno_ringbuffer.h>
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_half.h"
+#include "util/u_dynarray.h"
+#include "util/u_pack_color.h"
+#include "adreno_common.xml.h"
+#include "adreno_pm4.xml.h"
+enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format);
+enum pc_di_index_size fd_pipe2index(enum pipe_format format);
+enum adreno_rb_blend_factor fd_blend_factor(unsigned factor);
+enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
+enum adreno_stencil_op fd_stencil_op(unsigned op);
+#define A3XX_MAX_MIP_LEVELS 14
+/* TBD if it is same on a2xx, but for now: */
+#define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS
+#define FD_DBG_MSGS     0x0001
+#define FD_DBG_DISASM   0x0002
+#define FD_DBG_DCLEAR   0x0004
+#define FD_DBG_FLUSH    0x0008
+#define FD_DBG_NOSCIS   0x0010
+#define FD_DBG_DIRECT   0x0020
+#define FD_DBG_NOBYPASS 0x0040
+#define FD_DBG_FRAGHALF 0x0080
+#define FD_DBG_NOBIN    0x0100
+#define FD_DBG_OPTMSGS  0x0400
+#define FD_DBG_OPTDUMP  0x0800
+#define FD_DBG_GLSL120  0x1000
+#define FD_DBG_NOCP     0x2000
+#define FD_DBG_NIR      0x4000
+extern int fd_mesa_debug;
+extern bool fd_binning_enabled;
+#define DBG(fmt, ...) \
+                do { if (fd_mesa_debug & FD_DBG_MSGS) \
+                        debug_printf("%s:%d: "fmt "\n", \
+                                __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+/* for conditionally setting boolean flag(s): */
+#define COND(bool, val) ((bool) ? (val) : 0)
+#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000))))
+static inline uint32_t DRAW(enum pc_di_primtype prim_type,
+                enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
+                enum pc_di_vis_cull_mode vis_cull_mode,
+                uint8_t instances)
+{
+        return (prim_type         << 0) |
+                        (source_select     << 6) |
+                        ((index_size & 1)  << 11) |
+                        ((index_size >> 1) << 13) |
+                        (vis_cull_mode     << 9) |
+                        (1                 << 14) |
+                        (instances         << 24);
+}
+/* for tracking cmdstream positions that need to be patched: */
+struct fd_cs_patch {
+        uint32_t *cs;
+        uint32_t val;
+};
+#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))
+#define fd_patch_element(buf, i)   util_dynarray_element(buf, struct fd_cs_patch, i)
+static inline enum pipe_format
+pipe_surface_format(struct pipe_surface *psurf)
+{
+        if (!psurf)
+                return PIPE_FORMAT_NONE;
+        return psurf->format;
+}
+#define LOG_DWORDS 0
+static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
+static inline void
+OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
+{
+        if (LOG_DWORDS) {
+                DBG("ring[%p]: OUT_RING   %04x:  %08x", ring,
+                                (uint32_t)(ring->cur - ring->last_start), data);
+        }
+        *(ring->cur++) = data;
+}
+/* like OUT_RING() but appends a cmdstream patch point to 'buf' */
+static inline void
+OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
+                struct util_dynarray *buf)
+{
+        if (LOG_DWORDS) {
+                DBG("ring[%p]: OUT_RINGP  %04x:  %08x", ring,
+                                (uint32_t)(ring->cur - ring->last_start), data);
+        }
+        util_dynarray_append(buf, struct fd_cs_patch, ((struct fd_cs_patch){
+                .cs  = ring->cur++,
+                .val = data,
+        }));
+}
+static inline void
+OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
+                uint32_t offset, uint32_t or, int32_t shift)
+{
+        if (LOG_DWORDS) {
+                DBG("ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
+                                (uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
+        }
+        fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+                .bo = bo,
+                .flags = FD_RELOC_READ,
+                .offset = offset,
+                .or = or,
+                .shift = shift,
+        });
+}
+static inline void
+OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
+                uint32_t offset, uint32_t or, int32_t shift)
+{
+        if (LOG_DWORDS) {
+                DBG("ring[%p]: OUT_RELOCW  %04x:  %p+%u << %d", ring,
+                                (uint32_t)(ring->cur - ring->last_start), bo, offset, shift);
+        }
+        fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+                .bo = bo,
+                .flags = FD_RELOC_READ | FD_RELOC_WRITE,
+                .offset = offset,
+                .or = or,
+                .shift = shift,
+        });
+}
+static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
+{
+        if ((ring->cur + ndwords) >= ring->end) {
+                /* this probably won't really work if we have multiple tiles..
+                 * but it is ok for 2d..  we might need different behavior
+                 * depending on 2d or 3d pipe.
+                 */
+                DBG("uh oh..");
+        }
+}
+static inline void
+OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
+{
+        BEGIN_RING(ring, cnt+1);
+        OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
+}
+static inline void
+OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
+{
+        BEGIN_RING(ring, cnt+1);
+        OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
+}
+static inline void
+OUT_WFI(struct fd_ringbuffer *ring)
+{
+        OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+        OUT_RING(ring, 0x00000000);
+}
+static inline void
+OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
+                struct fd_ringmarker *end)
+{
+        uint32_t dwords = fd_ringmarker_dwords(start, end);
+        assert(dwords > 0);
+        /* for debug after a lock up, write a unique counter value
+         * to scratch6 for each IB, to make it easier to match up
+         * register dumps to cmdstream.  The combination of IB and
+         * DRAW (scratch7) is enough to "triangulate" the particular
+         * draw that caused lockup.
+         */
+        emit_marker(ring, 6);
+        OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
+        fd_ringbuffer_emit_reloc_ring(ring, start, end);
+        OUT_RING(ring, dwords);
+        emit_marker(ring, 6);
+}
+/* CP_SCRATCH_REG4 is used to hold base address for query results: */
+#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
+static inline void
+emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
+{
+        extern unsigned marker_cnt;
+        unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
+        assert(reg != HW_QUERY_BASE_REG);
+        if (reg == HW_QUERY_BASE_REG)
+                return;
+        OUT_PKT0(ring, reg, 1);
+        OUT_RING(ring, ++marker_cnt);
+}
+/* helper to get numeric value from environment variable..  mostly
+ * just leaving this here because it is helpful to brute-force figure
+ * out unknown formats, etc, which blob driver does not support:
+ */
+static inline uint32_t env2u(const char *envvar)
+{
+        char *str = getenv(envvar);
+        if (str)
+                return strtoul(str, NULL, 0);
+        return 0;
+}
+static inline uint32_t
+pack_rgba(enum pipe_format format, const float *rgba)
+{
+        union util_color uc;
+        util_pack_color(rgba, format, &uc);
+        return uc.ui[0];
+}
+#endif /* FREEDRENO_UTIL_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
 ,0 → 1,797
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <util/u_debug.h>
+#include "disasm.h"
+#include "instr-a3xx.h"
+static enum debug_t debug;
+#define printf debug_printf
+static const char *levels[] = {
+                "",
+                "\t",
+                "\t\t",
+                "\t\t\t",
+                "\t\t\t\t",
+                "\t\t\t\t\t",
+                "\t\t\t\t\t\t",
+                "\t\t\t\t\t\t\t",
+                "\t\t\t\t\t\t\t\t",
+                "\t\t\t\t\t\t\t\t\t",
+                "x",
+                "x",
+                "x",
+                "x",
+                "x",
+                "x",
+};
+static const char *component = "xyzw";
+static const char *type[] = {
+                [TYPE_F16] = "f16",
+                [TYPE_F32] = "f32",
+                [TYPE_U16] = "u16",
+                [TYPE_U32] = "u32",
+                [TYPE_S16] = "s16",
+                [TYPE_S32] = "s32",
+                [TYPE_U8]  = "u8",
+                [TYPE_S8]  = "s8",
+};
+static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
+                bool neg, bool abs, bool addr_rel)
+{
+        const char type = c ? 'c' : 'r';
+        // XXX I prefer - and || for neg/abs, but preserving format used
+        // by libllvm-a3xx for easy diffing..
+        if (abs && neg)
+                printf("(absneg)");
+        else if (neg)
+                printf("(neg)");
+        else if (abs)
+                printf("(abs)");
+        if (r)
+                printf("(r)");
+        if (im) {
+                printf("%d", reg.iim_val);
+        } else if (addr_rel) {
+                /* I would just use %+d but trying to make it diff'able with
+                 * libllvm-a3xx...
+                 */
+                if (reg.iim_val < 0)
+                        printf("%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
+                else if (reg.iim_val > 0)
+                        printf("%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
+                else
+                        printf("%s%c<a0.x>", full ? "" : "h", type);
+        } else if ((reg.num == REG_A0) && !c) {
+                printf("a0.%c", component[reg.comp]);
+        } else if ((reg.num == REG_P0) && !c) {
+                printf("p0.%c", component[reg.comp]);
+        } else {
+                printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+        }
+}
+/* current instruction repeat flag: */
+static unsigned repeat;
+static void print_reg_dst(reg_t reg, bool full, bool addr_rel)
+{
+        print_reg(reg, full, false, false, false, false, false, addr_rel);
+}
+static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
+                bool neg, bool abs, bool addr_rel)
+{
+        print_reg(reg, full, r, c, im, neg, abs, addr_rel);
+}
+static void print_instr_cat0(instr_t *instr)
+{
+        instr_cat0_t *cat0 = &instr->cat0;
+        switch (cat0->opc) {
+        case OPC_KILL:
+                printf(" %sp0.%c", cat0->inv ? "!" : "",
+                                component[cat0->comp]);
+                break;
+        case OPC_BR:
+                printf(" %sp0.%c, #%d", cat0->inv ? "!" : "",
+                                component[cat0->comp], cat0->immed);
+                break;
+        case OPC_JUMP:
+        case OPC_CALL:
+                printf(" #%d", cat0->immed);
+                break;
+        }
+        if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4))
+                printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4);
+}
+static void print_instr_cat1(instr_t *instr)
+{
+        instr_cat1_t *cat1 = &instr->cat1;
+        if (cat1->ul)
+                printf("(ul)");
+        if (cat1->src_type == cat1->dst_type) {
+                if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
+                        /* special case (nmemonic?): */
+                        printf("mova");
+                } else {
+                        printf("mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+                }
+        } else {
+                printf("cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+        }
+        printf(" ");
+        if (cat1->even)
+                printf("(even)");
+        if (cat1->pos_inf)
+                printf("(pos_infinity)");
+        print_reg_dst((reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
+                        cat1->dst_rel);
+        printf(", ");
+        /* ugg, have to special case this.. vs print_reg().. */
+        if (cat1->src_im) {
+                if (type_float(cat1->src_type))
+                        printf("(%f)", cat1->fim_val);
+                else if (type_uint(cat1->src_type))
+                        printf("0x%08x", cat1->uim_val);
+                else
+                        printf("%d", cat1->iim_val);
+        } else if (cat1->src_rel && !cat1->src_c) {
+                /* I would just use %+d but trying to make it diff'able with
+                 * libllvm-a3xx...
+                 */
+                char type = cat1->src_rel_c ? 'c' : 'r';
+                if (cat1->off < 0)
+                        printf("%c<a0.x - %d>", type, -cat1->off);
+                else if (cat1->off > 0)
+                        printf("%c<a0.x + %d>", type, cat1->off);
+                else
+                        printf("c<a0.x>");
+        } else {
+                print_reg_src((reg_t)(cat1->src), type_size(cat1->src_type) == 32,
+                                cat1->src_r, cat1->src_c, cat1->src_im, false, false, false);
+        }
+        if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
+                printf("\t{1: %x}", cat1->must_be_0);
+}
+static void print_instr_cat2(instr_t *instr)
+{
+        instr_cat2_t *cat2 = &instr->cat2;
+        static const char *cond[] = {
+                        "lt",
+                        "le",
+                        "gt",
+                        "ge",
+                        "eq",
+                        "ne",
+                        "?6?",
+        };
+        switch (cat2->opc) {
+        case OPC_CMPS_F:
+        case OPC_CMPS_U:
+        case OPC_CMPS_S:
+        case OPC_CMPV_F:
+        case OPC_CMPV_U:
+        case OPC_CMPV_S:
+                printf(".%s", cond[cat2->cond]);
+                break;
+        }
+        printf(" ");
+        if (cat2->ei)
+                printf("(ei)");
+        print_reg_dst((reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
+        printf(", ");
+        if (cat2->c1.src1_c) {
+                print_reg_src((reg_t)(cat2->c1.src1), cat2->full, cat2->src1_r,
+                                cat2->c1.src1_c, cat2->src1_im, cat2->src1_neg,
+                                cat2->src1_abs, false);
+        } else if (cat2->rel1.src1_rel) {
+                print_reg_src((reg_t)(cat2->rel1.src1), cat2->full, cat2->src1_r,
+                                cat2->rel1.src1_c, cat2->src1_im, cat2->src1_neg,
+                                cat2->src1_abs, cat2->rel1.src1_rel);
+        } else {
+                print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r,
+                                false, cat2->src1_im, cat2->src1_neg,
+                                cat2->src1_abs, false);
+        }
+        switch (cat2->opc) {
+        case OPC_ABSNEG_F:
+        case OPC_ABSNEG_S:
+        case OPC_CLZ_B:
+        case OPC_CLZ_S:
+        case OPC_SIGN_F:
+        case OPC_FLOOR_F:
+        case OPC_CEIL_F:
+        case OPC_RNDNE_F:
+        case OPC_RNDAZ_F:
+        case OPC_TRUNC_F:
+        case OPC_NOT_B:
+        case OPC_BFREV_B:
+        case OPC_SETRM:
+        case OPC_CBITS_B:
+                /* these only have one src reg */
+                break;
+        default:
+                printf(", ");
+                if (cat2->c2.src2_c) {
+                        print_reg_src((reg_t)(cat2->c2.src2), cat2->full, cat2->src2_r,
+                                        cat2->c2.src2_c, cat2->src2_im, cat2->src2_neg,
+                                        cat2->src2_abs, false);
+                } else if (cat2->rel2.src2_rel) {
+                        print_reg_src((reg_t)(cat2->rel2.src2), cat2->full, cat2->src2_r,
+                                        cat2->rel2.src2_c, cat2->src2_im, cat2->src2_neg,
+                                        cat2->src2_abs, cat2->rel2.src2_rel);
+                } else {
+                        print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r,
+                                        false, cat2->src2_im, cat2->src2_neg,
+                                        cat2->src2_abs, false);
+                }
+                break;
+        }
+}
+static void print_instr_cat3(instr_t *instr)
+{
+        instr_cat3_t *cat3 = &instr->cat3;
+        bool full = instr_cat3_full(cat3);
+        printf(" ");
+        print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false);
+        printf(", ");
+        if (cat3->c1.src1_c) {
+                print_reg_src((reg_t)(cat3->c1.src1), full,
+                                cat3->src1_r, cat3->c1.src1_c, false, cat3->src1_neg,
+                                false, false);
+        } else if (cat3->rel1.src1_rel) {
+                print_reg_src((reg_t)(cat3->rel1.src1), full,
+                                cat3->src1_r, cat3->rel1.src1_c, false, cat3->src1_neg,
+                                false, cat3->rel1.src1_rel);
+        } else {
+                print_reg_src((reg_t)(cat3->src1), full,
+                                cat3->src1_r, false, false, cat3->src1_neg,
+                                false, false);
+        }
+        printf(", ");
+        print_reg_src((reg_t)cat3->src2, full,
+                        cat3->src2_r, cat3->src2_c, false, cat3->src2_neg,
+                        false, false);
+        printf(", ");
+        if (cat3->c2.src3_c) {
+                print_reg_src((reg_t)(cat3->c2.src3), full,
+                                cat3->src3_r, cat3->c2.src3_c, false, cat3->src3_neg,
+                                false, false);
+        } else if (cat3->rel2.src3_rel) {
+                print_reg_src((reg_t)(cat3->rel2.src3), full,
+                                cat3->src3_r, cat3->rel2.src3_c, false, cat3->src3_neg,
+                                false, cat3->rel2.src3_rel);
+        } else {
+                print_reg_src((reg_t)(cat3->src3), full,
+                                cat3->src3_r, false, false, cat3->src3_neg,
+                                false, false);
+        }
+}
+static void print_instr_cat4(instr_t *instr)
+{
+        instr_cat4_t *cat4 = &instr->cat4;
+        printf(" ");
+        print_reg_dst((reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
+        printf(", ");
+        if (cat4->c.src_c) {
+                print_reg_src((reg_t)(cat4->c.src), cat4->full,
+                                cat4->src_r, cat4->c.src_c, cat4->src_im,
+                                cat4->src_neg, cat4->src_abs, false);
+        } else if (cat4->rel.src_rel) {
+                print_reg_src((reg_t)(cat4->rel.src), cat4->full,
+                                cat4->src_r, cat4->rel.src_c, cat4->src_im,
+                                cat4->src_neg, cat4->src_abs, cat4->rel.src_rel);
+        } else {
+                print_reg_src((reg_t)(cat4->src), cat4->full,
+                                cat4->src_r, false, cat4->src_im,
+                                cat4->src_neg, cat4->src_abs, false);
+        }
+        if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
+                printf("\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
+}
+static void print_instr_cat5(instr_t *instr)
+{
+        static const struct {
+                bool src1, src2, samp, tex;
+        } info[0x1f] = {
+                        [OPC_ISAM]     = { true,  false, true,  true,  },
+                        [OPC_ISAML]    = { true,  true,  true,  true,  },
+                        [OPC_ISAMM]    = { true,  false, true,  true,  },
+                        [OPC_SAM]      = { true,  false, true,  true,  },
+                        [OPC_SAMB]     = { true,  true,  true,  true,  },
+                        [OPC_SAML]     = { true,  true,  true,  true,  },
+                        [OPC_SAMGQ]    = { true,  false, true,  true,  },
+                        [OPC_GETLOD]   = { true,  false, true,  true,  },
+                        [OPC_CONV]     = { true,  true,  true,  true,  },
+                        [OPC_CONVM]    = { true,  true,  true,  true,  },
+                        [OPC_GETSIZE]  = { true,  false, false, true,  },
+                        [OPC_GETBUF]   = { false, false, false, true,  },
+                        [OPC_GETPOS]   = { true,  false, false, true,  },
+                        [OPC_GETINFO]  = { false, false, false, true,  },
+                        [OPC_DSX]      = { true,  false, false, false, },
+                        [OPC_DSY]      = { true,  false, false, false, },
+                        [OPC_GATHER4R] = { true,  false, true,  true,  },
+                        [OPC_GATHER4G] = { true,  false, true,  true,  },
+                        [OPC_GATHER4B] = { true,  false, true,  true,  },
+                        [OPC_GATHER4A] = { true,  false, true,  true,  },
+                        [OPC_SAMGP0]   = { true,  false, true,  true,  },
+                        [OPC_SAMGP1]   = { true,  false, true,  true,  },
+                        [OPC_SAMGP2]   = { true,  false, true,  true,  },
+                        [OPC_SAMGP3]   = { true,  false, true,  true,  },
+                        [OPC_DSXPP_1]  = { true,  false, false, false, },
+                        [OPC_DSYPP_1]  = { true,  false, false, false, },
+                        [OPC_RGETPOS]  = { false, false, false, false, },
+                        [OPC_RGETINFO] = { false, false, false, false, },
+        };
+        instr_cat5_t *cat5 = &instr->cat5;
+        int i;
+        if (cat5->is_3d)   printf(".3d");
+        if (cat5->is_a)    printf(".a");
+        if (cat5->is_o)    printf(".o");
+        if (cat5->is_p)    printf(".p");
+        if (cat5->is_s)    printf(".s");
+        if (cat5->is_s2en) printf(".s2en");
+        printf(" ");
+        switch (cat5->opc) {
+        case OPC_DSXPP_1:
+        case OPC_DSYPP_1:
+                break;
+        default:
+                printf("(%s)", type[cat5->type]);
+                break;
+        }
+        printf("(");
+        for (i = 0; i < 4; i++)
+                if (cat5->wrmask & (1 << i))
+                        printf("%c", "xyzw"[i]);
+        printf(")");
+        print_reg_dst((reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
+        if (info[cat5->opc].src1) {
+                printf(", ");
+                print_reg_src((reg_t)(cat5->src1), cat5->full, false, false, false,
+                                false, false, false);
+        }
+        if (cat5->is_s2en) {
+                printf(", ");
+                print_reg_src((reg_t)(cat5->s2en.src2), cat5->full, false, false, false,
+                                false, false, false);
+                printf(", ");
+                print_reg_src((reg_t)(cat5->s2en.src3), false, false, false, false,
+                                false, false, false);
+        } else {
+                if (cat5->is_o || info[cat5->opc].src2) {
+                        printf(", ");
+                        print_reg_src((reg_t)(cat5->norm.src2), cat5->full,
+                                        false, false, false, false, false, false);
+                }
+                if (info[cat5->opc].samp)
+                        printf(", s#%d", cat5->norm.samp);
+                if (info[cat5->opc].tex)
+                        printf(", t#%d", cat5->norm.tex);
+        }
+        if (debug & PRINT_VERBOSE) {
+                if (cat5->is_s2en) {
+                        if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
+                                printf("\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
+                } else {
+                        if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
+                                printf("\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
+                }
+        }
+}
+static void print_instr_cat6(instr_t *instr)
+{
+        instr_cat6_t *cat6 = &instr->cat6;
+        char sd = 0, ss = 0;  /* dst/src address space */
+        bool full = type_size(cat6->type) == 32;
+        bool nodst = false;
+        printf(".%s ", type[cat6->type]);
+        switch (cat6->opc) {
+        case OPC_STG:
+                sd = 'g';
+                break;
+        case OPC_STP:
+                sd = 'p';
+                break;
+        case OPC_STL:
+        case OPC_STLW:
+                sd = 'l';
+                break;
+        case OPC_LDG:
+                ss = 'g';
+                break;
+        case OPC_LDP:
+                ss = 'p';
+                break;
+        case OPC_LDL:
+        case OPC_LDLW:
+        case OPC_LDLV:
+                ss = 'l';
+                break;
+        case OPC_L2G:
+                ss = 'l';
+                sd = 'g';
+                break;
+        case OPC_G2L:
+                ss = 'g';
+                sd = 'l';
+                break;
+        case OPC_PREFETCH:
+                ss = 'g';
+                nodst = true;
+                break;
+        case OPC_STI:
+                full = false;  // XXX or inverts??
+                break;
+        }
+        if (cat6->has_off) {
+                if (!nodst) {
+                        if (sd)
+                                printf("%c[", sd);
+                        print_reg_dst((reg_t)(cat6->a.dst), full, false);
+                        if (sd)
+                                printf("]");
+                        printf(", ");
+                }
+                if (ss)
+                        printf("%c[", ss);
+                print_reg_src((reg_t)(cat6->a.src1), true,
+                                false, false, cat6->a.src1_im, false, false, false);
+                if (cat6->a.off)
+                        printf("%+d", cat6->a.off);
+                if (ss)
+                        printf("]");
+                printf(", ");
+                print_reg_src((reg_t)(cat6->a.src2), full,
+                                false, false, cat6->a.src2_im, false, false, false);
+        } else {
+                if (!nodst) {
+                        if (sd)
+                                printf("%c[", sd);
+                        print_reg_dst((reg_t)(cat6->b.dst), full, false);
+                        if (sd)
+                                printf("]");
+                        printf(", ");
+                }
+                if (ss)
+                        printf("%c[", ss);
+                print_reg_src((reg_t)(cat6->b.src1), true,
+                                false, false, cat6->b.src1_im, false, false, false);
+                if (ss)
+                        printf("]");
+                printf(", ");
+                print_reg_src((reg_t)(cat6->b.src2), full,
+                                false, false, cat6->b.src2_im, false, false, false);
+        }
+        if (debug & PRINT_VERBOSE) {
+                switch (cat6->opc) {
+                case OPC_LDG:
+                case OPC_LDP:
+                        /* load instructions: */
+                        if (cat6->a.dummy2|cat6->a.dummy3)
+                                printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
+                        break;
+                case OPC_STG:
+                case OPC_STP:
+                case OPC_STI:
+                        /* store instructions: */
+                        if (cat6->b.dummy2|cat6->b.dummy2)
+                                printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
+                        if (cat6->b.ignore0)
+                                printf("\t{?? %x}", cat6->b.ignore0);
+                        break;
+                }
+        }
+}
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+struct opc_info {
+        uint16_t cat;
+        uint16_t opc;
+        const char *name;
+        void (*print)(instr_t *instr);
+} opcs[1 << (3+NOPC_BITS)] = {
+#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat }
+        /* category 0: */
+        OPC(0, OPC_NOP,          nop),
+        OPC(0, OPC_BR,           br),
+        OPC(0, OPC_JUMP,         jump),
+        OPC(0, OPC_CALL,         call),
+        OPC(0, OPC_RET,          ret),
+        OPC(0, OPC_KILL,         kill),
+        OPC(0, OPC_END,          end),
+        OPC(0, OPC_EMIT,         emit),
+        OPC(0, OPC_CUT,          cut),
+        OPC(0, OPC_CHMASK,       chmask),
+        OPC(0, OPC_CHSH,         chsh),
+        OPC(0, OPC_FLOW_REV,     flow_rev),
+        /* category 1: */
+        OPC(1, 0, ),
+        /* category 2: */
+        OPC(2, OPC_ADD_F,        add.f),
+        OPC(2, OPC_MIN_F,        min.f),
+        OPC(2, OPC_MAX_F,        max.f),
+        OPC(2, OPC_MUL_F,        mul.f),
+        OPC(2, OPC_SIGN_F,       sign.f),
+        OPC(2, OPC_CMPS_F,       cmps.f),
+        OPC(2, OPC_ABSNEG_F,     absneg.f),
+        OPC(2, OPC_CMPV_F,       cmpv.f),
+        OPC(2, OPC_FLOOR_F,      floor.f),
+        OPC(2, OPC_CEIL_F,       ceil.f),
+        OPC(2, OPC_RNDNE_F,      rndne.f),
+        OPC(2, OPC_RNDAZ_F,      rndaz.f),
+        OPC(2, OPC_TRUNC_F,      trunc.f),
+        OPC(2, OPC_ADD_U,        add.u),
+        OPC(2, OPC_ADD_S,        add.s),
+        OPC(2, OPC_SUB_U,        sub.u),
+        OPC(2, OPC_SUB_S,        sub.s),
+        OPC(2, OPC_CMPS_U,       cmps.u),
+        OPC(2, OPC_CMPS_S,       cmps.s),
+        OPC(2, OPC_MIN_U,        min.u),
+        OPC(2, OPC_MIN_S,        min.s),
+        OPC(2, OPC_MAX_U,        max.u),
+        OPC(2, OPC_MAX_S,        max.s),
+        OPC(2, OPC_ABSNEG_S,     absneg.s),
+        OPC(2, OPC_AND_B,        and.b),
+        OPC(2, OPC_OR_B,         or.b),
+        OPC(2, OPC_NOT_B,        not.b),
+        OPC(2, OPC_XOR_B,        xor.b),
+        OPC(2, OPC_CMPV_U,       cmpv.u),
+        OPC(2, OPC_CMPV_S,       cmpv.s),
+        OPC(2, OPC_MUL_U,        mul.u),
+        OPC(2, OPC_MUL_S,        mul.s),
+        OPC(2, OPC_MULL_U,       mull.u),
+        OPC(2, OPC_BFREV_B,      bfrev.b),
+        OPC(2, OPC_CLZ_S,        clz.s),
+        OPC(2, OPC_CLZ_B,        clz.b),
+        OPC(2, OPC_SHL_B,        shl.b),
+        OPC(2, OPC_SHR_B,        shr.b),
+        OPC(2, OPC_ASHR_B,       ashr.b),
+        OPC(2, OPC_BARY_F,       bary.f),
+        OPC(2, OPC_MGEN_B,       mgen.b),
+        OPC(2, OPC_GETBIT_B,     getbit.b),
+        OPC(2, OPC_SETRM,        setrm),
+        OPC(2, OPC_CBITS_B,      cbits.b),
+        OPC(2, OPC_SHB,          shb),
+        OPC(2, OPC_MSAD,         msad),
+        /* category 3: */
+        OPC(3, OPC_MAD_U16,      mad.u16),
+        OPC(3, OPC_MADSH_U16,    madsh.u16),
+        OPC(3, OPC_MAD_S16,      mad.s16),
+        OPC(3, OPC_MADSH_M16,    madsh.m16),
+        OPC(3, OPC_MAD_U24,      mad.u24),
+        OPC(3, OPC_MAD_S24,      mad.s24),
+        OPC(3, OPC_MAD_F16,      mad.f16),
+        OPC(3, OPC_MAD_F32,      mad.f32),
+        OPC(3, OPC_SEL_B16,      sel.b16),
+        OPC(3, OPC_SEL_B32,      sel.b32),
+        OPC(3, OPC_SEL_S16,      sel.s16),
+        OPC(3, OPC_SEL_S32,      sel.s32),
+        OPC(3, OPC_SEL_F16,      sel.f16),
+        OPC(3, OPC_SEL_F32,      sel.f32),
+        OPC(3, OPC_SAD_S16,      sad.s16),
+        OPC(3, OPC_SAD_S32,      sad.s32),
+        /* category 4: */
+        OPC(4, OPC_RCP,          rcp),
+        OPC(4, OPC_RSQ,          rsq),
+        OPC(4, OPC_LOG2,         log2),
+        OPC(4, OPC_EXP2,         exp2),
+        OPC(4, OPC_SIN,          sin),
+        OPC(4, OPC_COS,          cos),
+        OPC(4, OPC_SQRT,         sqrt),
+        /* category 5: */
+        OPC(5, OPC_ISAM,         isam),
+        OPC(5, OPC_ISAML,        isaml),
+        OPC(5, OPC_ISAMM,        isamm),
+        OPC(5, OPC_SAM,          sam),
+        OPC(5, OPC_SAMB,         samb),
+        OPC(5, OPC_SAML,         saml),
+        OPC(5, OPC_SAMGQ,        samgq),
+        OPC(5, OPC_GETLOD,       getlod),
+        OPC(5, OPC_CONV,         conv),
+        OPC(5, OPC_CONVM,        convm),
+        OPC(5, OPC_GETSIZE,      getsize),
+        OPC(5, OPC_GETBUF,       getbuf),
+        OPC(5, OPC_GETPOS,       getpos),
+        OPC(5, OPC_GETINFO,      getinfo),
+        OPC(5, OPC_DSX,          dsx),
+        OPC(5, OPC_DSY,          dsy),
+        OPC(5, OPC_GATHER4R,     gather4r),
+        OPC(5, OPC_GATHER4G,     gather4g),
+        OPC(5, OPC_GATHER4B,     gather4b),
+        OPC(5, OPC_GATHER4A,     gather4a),
+        OPC(5, OPC_SAMGP0,       samgp0),
+        OPC(5, OPC_SAMGP1,       samgp1),
+        OPC(5, OPC_SAMGP2,       samgp2),
+        OPC(5, OPC_SAMGP3,       samgp3),
+        OPC(5, OPC_DSXPP_1,      dsxpp.1),
+        OPC(5, OPC_DSYPP_1,      dsypp.1),
+        OPC(5, OPC_RGETPOS,      rgetpos),
+        OPC(5, OPC_RGETINFO,     rgetinfo),
+        /* category 6: */
+        OPC(6, OPC_LDG,          ldg),
+        OPC(6, OPC_LDL,          ldl),
+        OPC(6, OPC_LDP,          ldp),
+        OPC(6, OPC_STG,          stg),
+        OPC(6, OPC_STL,          stl),
+        OPC(6, OPC_STP,          stp),
+        OPC(6, OPC_STI,          sti),
+        OPC(6, OPC_G2L,          g2l),
+        OPC(6, OPC_L2G,          l2g),
+        OPC(6, OPC_PREFETCH,     prefetch),
+        OPC(6, OPC_LDLW,         ldlw),
+        OPC(6, OPC_STLW,         stlw),
+        OPC(6, OPC_RESFMT,       resfmt),
+        OPC(6, OPC_RESINFO,      resinf),
+        OPC(6, OPC_ATOMIC_ADD_L,     atomic.add.l),
+        OPC(6, OPC_ATOMIC_SUB_L,     atomic.sub.l),
+        OPC(6, OPC_ATOMIC_XCHG_L,    atomic.xchg.l),
+        OPC(6, OPC_ATOMIC_INC_L,     atomic.inc.l),
+        OPC(6, OPC_ATOMIC_DEC_L,     atomic.dec.l),
+        OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
+        OPC(6, OPC_ATOMIC_MIN_L,     atomic.min.l),
+        OPC(6, OPC_ATOMIC_MAX_L,     atomic.max.l),
+        OPC(6, OPC_ATOMIC_AND_L,     atomic.and.l),
+        OPC(6, OPC_ATOMIC_OR_L,      atomic.or.l),
+        OPC(6, OPC_ATOMIC_XOR_L,     atomic.xor.l),
+        OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.4d),
+        OPC(6, OPC_STGB_4D_4,    stgb.4d.4),
+        OPC(6, OPC_STIB,         stib),
+        OPC(6, OPC_LDC_4,        ldc.4),
+        OPC(6, OPC_LDLV,         ldlv),
+#undef OPC
+};
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
+// XXX hack.. probably should move this table somewhere common:
+#include "ir3.h"
+const char *ir3_instr_name(struct ir3_instruction *instr)
+{
+        if (instr->category == -1) return "??meta??";
+        return opcs[(instr->category << NOPC_BITS) | instr->opc].name;
+}
+static void print_instr(uint32_t *dwords, int level, int n)
+{
+        instr_t *instr = (instr_t *)dwords;
+        uint32_t opc = instr_opc(instr);
+        const char *name;
+        if (debug & PRINT_VERBOSE)
+                printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]);
+        /* NOTE: order flags are printed is a bit fugly.. but for now I
+         * try to match the order in llvm-a3xx disassembler for easy
+         * diff'ing..
+         */
+        if (instr->sync)
+                printf("(sy)");
+        if (instr->ss && (instr->opc_cat <= 4))
+                printf("(ss)");
+        if (instr->jmp_tgt)
+                printf("(jp)");
+        if (instr->repeat && (instr->opc_cat <= 4)) {
+                printf("(rpt%d)", instr->repeat);
+                repeat = instr->repeat;
+        } else {
+                repeat = 0;
+        }
+        if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
+                printf("(ul)");
+        name = GETINFO(instr)->name;
+        if (name) {
+                printf("%s", name);
+                GETINFO(instr)->print(instr);
+        } else {
+                printf("unknown(%d,%d)", instr->opc_cat, opc);
+        }
+        printf("\n");
+}
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+{
+        int i;
+        assert((sizedwords % 2) == 0);
+        for (i = 0; i < sizedwords; i += 2)
+                print_instr(&dwords[i], level, i/2);
+        return 0;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
 ,0 → 1,702
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef INSTR_A3XX_H_
+#define INSTR_A3XX_H_
+#define PACKED __attribute__((__packed__))
+#include <stdint.h>
+#include <assert.h>
+typedef enum {
+        /* category 0: */
+        OPC_NOP = 0,
+        OPC_BR = 1,
+        OPC_JUMP = 2,
+        OPC_CALL = 3,
+        OPC_RET = 4,
+        OPC_KILL = 5,
+        OPC_END = 6,
+        OPC_EMIT = 7,
+        OPC_CUT = 8,
+        OPC_CHMASK = 9,
+        OPC_CHSH = 10,
+        OPC_FLOW_REV = 11,
+        /* category 1: */
+        /* no opc.. all category 1 are variants of mov */
+        /* category 2: */
+        OPC_ADD_F = 0,
+        OPC_MIN_F = 1,
+        OPC_MAX_F = 2,
+        OPC_MUL_F = 3,
+        OPC_SIGN_F = 4,
+        OPC_CMPS_F = 5,
+        OPC_ABSNEG_F = 6,
+        OPC_CMPV_F = 7,
+        /* 8 - invalid */
+        OPC_FLOOR_F = 9,
+        OPC_CEIL_F = 10,
+        OPC_RNDNE_F = 11,
+        OPC_RNDAZ_F = 12,
+        OPC_TRUNC_F = 13,
+        /* 14-15 - invalid */
+        OPC_ADD_U = 16,
+        OPC_ADD_S = 17,
+        OPC_SUB_U = 18,
+        OPC_SUB_S = 19,
+        OPC_CMPS_U = 20,
+        OPC_CMPS_S = 21,
+        OPC_MIN_U = 22,
+        OPC_MIN_S = 23,
+        OPC_MAX_U = 24,
+        OPC_MAX_S = 25,
+        OPC_ABSNEG_S = 26,
+        /* 27 - invalid */
+        OPC_AND_B = 28,
+        OPC_OR_B = 29,
+        OPC_NOT_B = 30,
+        OPC_XOR_B = 31,
+        /* 32 - invalid */
+        OPC_CMPV_U = 33,
+        OPC_CMPV_S = 34,
+        /* 35-47 - invalid */
+        OPC_MUL_U = 48,
+        OPC_MUL_S = 49,
+        OPC_MULL_U = 50,
+        OPC_BFREV_B = 51,
+        OPC_CLZ_S = 52,
+        OPC_CLZ_B = 53,
+        OPC_SHL_B = 54,
+        OPC_SHR_B = 55,
+        OPC_ASHR_B = 56,
+        OPC_BARY_F = 57,
+        OPC_MGEN_B = 58,
+        OPC_GETBIT_B = 59,
+        OPC_SETRM = 60,
+        OPC_CBITS_B = 61,
+        OPC_SHB = 62,
+        OPC_MSAD = 63,
+        /* category 3: */
+        OPC_MAD_U16 = 0,
+        OPC_MADSH_U16 = 1,
+        OPC_MAD_S16 = 2,
+        OPC_MADSH_M16 = 3,   /* should this be .s16? */
+        OPC_MAD_U24 = 4,
+        OPC_MAD_S24 = 5,
+        OPC_MAD_F16 = 6,
+        OPC_MAD_F32 = 7,
+        OPC_SEL_B16 = 8,
+        OPC_SEL_B32 = 9,
+        OPC_SEL_S16 = 10,
+        OPC_SEL_S32 = 11,
+        OPC_SEL_F16 = 12,
+        OPC_SEL_F32 = 13,
+        OPC_SAD_S16 = 14,
+        OPC_SAD_S32 = 15,
+        /* category 4: */
+        OPC_RCP = 0,
+        OPC_RSQ = 1,
+        OPC_LOG2 = 2,
+        OPC_EXP2 = 3,
+        OPC_SIN = 4,
+        OPC_COS = 5,
+        OPC_SQRT = 6,
+        // 7-63 - invalid
+        /* category 5: */
+        OPC_ISAM = 0,
+        OPC_ISAML = 1,
+        OPC_ISAMM = 2,
+        OPC_SAM = 3,
+        OPC_SAMB = 4,
+        OPC_SAML = 5,
+        OPC_SAMGQ = 6,
+        OPC_GETLOD = 7,
+        OPC_CONV = 8,
+        OPC_CONVM = 9,
+        OPC_GETSIZE = 10,
+        OPC_GETBUF = 11,
+        OPC_GETPOS = 12,
+        OPC_GETINFO = 13,
+        OPC_DSX = 14,
+        OPC_DSY = 15,
+        OPC_GATHER4R = 16,
+        OPC_GATHER4G = 17,
+        OPC_GATHER4B = 18,
+        OPC_GATHER4A = 19,
+        OPC_SAMGP0 = 20,
+        OPC_SAMGP1 = 21,
+        OPC_SAMGP2 = 22,
+        OPC_SAMGP3 = 23,
+        OPC_DSXPP_1 = 24,
+        OPC_DSYPP_1 = 25,
+        OPC_RGETPOS = 26,
+        OPC_RGETINFO = 27,
+        /* category 6: */
+        OPC_LDG = 0,        /* load-global */
+        OPC_LDL = 1,
+        OPC_LDP = 2,
+        OPC_STG = 3,        /* store-global */
+        OPC_STL = 4,
+        OPC_STP = 5,
+        OPC_STI = 6,
+        OPC_G2L = 7,
+        OPC_L2G = 8,
+        OPC_PREFETCH = 9,
+        OPC_LDLW = 10,
+        OPC_STLW = 11,
+        OPC_RESFMT = 14,
+        OPC_RESINFO = 15,
+        OPC_ATOMIC_ADD_L = 16,
+        OPC_ATOMIC_SUB_L = 17,
+        OPC_ATOMIC_XCHG_L = 18,
+        OPC_ATOMIC_INC_L = 19,
+        OPC_ATOMIC_DEC_L = 20,
+        OPC_ATOMIC_CMPXCHG_L = 21,
+        OPC_ATOMIC_MIN_L = 22,
+        OPC_ATOMIC_MAX_L = 23,
+        OPC_ATOMIC_AND_L = 24,
+        OPC_ATOMIC_OR_L = 25,
+        OPC_ATOMIC_XOR_L = 26,
+        OPC_LDGB_TYPED_4D = 27,
+        OPC_STGB_4D_4 = 28,
+        OPC_STIB = 29,
+        OPC_LDC_4 = 30,
+        OPC_LDLV = 31,
+        /* meta instructions (category -1): */
+        /* placeholder instr to mark inputs/outputs: */
+        OPC_META_INPUT = 0,
+        OPC_META_OUTPUT = 1,
+        /* The "fan-in" and "fan-out" instructions are used for keeping
+         * track of instructions that write to multiple dst registers
+         * (fan-out) like texture sample instructions, or read multiple
+         * consecutive scalar registers (fan-in) (bary.f, texture samp)
+         */
+        OPC_META_FO = 2,
+        OPC_META_FI = 3,
+        /* branches/flow control */
+        OPC_META_FLOW = 4,
+        OPC_META_PHI = 5,
+} opc_t;
+typedef enum {
+        TYPE_F16 = 0,
+        TYPE_F32 = 1,
+        TYPE_U16 = 2,
+        TYPE_U32 = 3,
+        TYPE_S16 = 4,
+        TYPE_S32 = 5,
+        TYPE_U8  = 6,
+        TYPE_S8  = 7,  // XXX I assume?
+} type_t;
+static inline uint32_t type_size(type_t type)
+{
+        switch (type) {
+        case TYPE_F32:
+        case TYPE_U32:
+        case TYPE_S32:
+                return 32;
+        case TYPE_F16:
+        case TYPE_U16:
+        case TYPE_S16:
+                return 16;
+        case TYPE_U8:
+        case TYPE_S8:
+                return 8;
+        default:
+                assert(0); /* invalid type */
+                return 0;
+        }
+}
+static inline int type_float(type_t type)
+{
+        return (type == TYPE_F32) || (type == TYPE_F16);
+}
+static inline int type_uint(type_t type)
+{
+        return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
+}
+static inline int type_sint(type_t type)
+{
+        return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
+}
+typedef union PACKED {
+        /* normal gpr or const src register: */
+        struct PACKED {
+                uint32_t comp  : 2;
+                uint32_t num   : 10;
+        };
+        /* for immediate val: */
+        int32_t  iim_val   : 11;
+        /* to make compiler happy: */
+        uint32_t dummy32;
+        uint32_t dummy10   : 10;
+        uint32_t dummy11   : 11;
+        uint32_t dummy12   : 12;
+        uint32_t dummy13   : 13;
+        uint32_t dummy8    : 8;
+} reg_t;
+/* special registers: */
+#define REG_A0 61       /* address register */
+#define REG_P0 62       /* predicate register */
+static inline int reg_special(reg_t reg)
+{
+        return (reg.num == REG_A0) || (reg.num == REG_P0);
+}
+typedef struct PACKED {
+        /* dword0: */
+        int16_t  immed    : 16;
+        uint32_t dummy1   : 16;
+        /* dword1: */
+        uint32_t dummy2   : 8;
+        uint32_t repeat   : 3;
+        uint32_t dummy3   : 1;
+        uint32_t ss       : 1;
+        uint32_t dummy4   : 7;
+        uint32_t inv      : 1;
+        uint32_t comp     : 2;
+        uint32_t opc      : 4;
+        uint32_t jmp_tgt  : 1;
+        uint32_t sync     : 1;
+        uint32_t opc_cat  : 3;
+} instr_cat0_t;
+typedef struct PACKED {
+        /* dword0: */
+        union PACKED {
+                /* for normal src register: */
+                struct PACKED {
+                        uint32_t src : 11;
+                        /* at least low bit of pad must be zero or it will
+                         * look like a address relative src
+                         */
+                        uint32_t pad : 21;
+                };
+                /* for address relative: */
+                struct PACKED {
+                        int32_t  off : 10;
+                        uint32_t src_rel_c : 1;
+                        uint32_t src_rel : 1;
+                        uint32_t unknown : 20;
+                };
+                /* for immediate: */
+                int32_t  iim_val;
+                uint32_t uim_val;
+                float    fim_val;
+        };
+        /* dword1: */
+        uint32_t dst        : 8;
+        uint32_t repeat     : 3;
+        uint32_t src_r      : 1;
+        uint32_t ss         : 1;
+        uint32_t ul         : 1;
+        uint32_t dst_type   : 3;
+        uint32_t dst_rel    : 1;
+        uint32_t src_type   : 3;
+        uint32_t src_c      : 1;
+        uint32_t src_im     : 1;
+        uint32_t even       : 1;
+        uint32_t pos_inf    : 1;
+        uint32_t must_be_0  : 2;
+        uint32_t jmp_tgt    : 1;
+        uint32_t sync       : 1;
+        uint32_t opc_cat    : 3;
+} instr_cat1_t;
+typedef struct PACKED {
+        /* dword0: */
+        union PACKED {
+                struct PACKED {
+                        uint32_t src1         : 11;
+                        uint32_t must_be_zero1: 2;
+                        uint32_t src1_im      : 1;   /* immediate */
+                        uint32_t src1_neg     : 1;   /* negate */
+                        uint32_t src1_abs     : 1;   /* absolute value */
+                };
+                struct PACKED {
+                        uint32_t src1         : 10;
+                        uint32_t src1_c       : 1;   /* relative-const */
+                        uint32_t src1_rel     : 1;   /* relative address */
+                        uint32_t must_be_zero : 1;
+                        uint32_t dummy        : 3;
+                } rel1;
+                struct PACKED {
+                        uint32_t src1         : 12;
+                        uint32_t src1_c       : 1;   /* const */
+                        uint32_t dummy        : 3;
+                } c1;
+        };
+        union PACKED {
+                struct PACKED {
+                        uint32_t src2         : 11;
+                        uint32_t must_be_zero2: 2;
+                        uint32_t src2_im      : 1;   /* immediate */
+                        uint32_t src2_neg     : 1;   /* negate */
+                        uint32_t src2_abs     : 1;   /* absolute value */
+                };
+                struct PACKED {
+                        uint32_t src2         : 10;
+                        uint32_t src2_c       : 1;   /* relative-const */
+                        uint32_t src2_rel     : 1;   /* relative address */
+                        uint32_t must_be_zero : 1;
+                        uint32_t dummy        : 3;
+                } rel2;
+                struct PACKED {
+                        uint32_t src2         : 12;
+                        uint32_t src2_c       : 1;   /* const */
+                        uint32_t dummy        : 3;
+                } c2;
+        };
+        /* dword1: */
+        uint32_t dst      : 8;
+        uint32_t repeat   : 3;
+        uint32_t src1_r   : 1;
+        uint32_t ss       : 1;
+        uint32_t ul       : 1;   /* dunno */
+        uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+        uint32_t ei       : 1;
+        uint32_t cond     : 3;
+        uint32_t src2_r   : 1;
+        uint32_t full     : 1;   /* not half */
+        uint32_t opc      : 6;
+        uint32_t jmp_tgt  : 1;
+        uint32_t sync     : 1;
+        uint32_t opc_cat  : 3;
+} instr_cat2_t;
+typedef struct PACKED {
+        /* dword0: */
+        union PACKED {
+                struct PACKED {
+                        uint32_t src1         : 11;
+                        uint32_t must_be_zero1: 2;
+                        uint32_t src2_c       : 1;
+                        uint32_t src1_neg     : 1;
+                        uint32_t src2_r       : 1;
+                };
+                struct PACKED {
+                        uint32_t src1         : 10;
+                        uint32_t src1_c       : 1;
+                        uint32_t src1_rel     : 1;
+                        uint32_t must_be_zero : 1;
+                        uint32_t dummy        : 3;
+                } rel1;
+                struct PACKED {
+                        uint32_t src1         : 12;
+                        uint32_t src1_c       : 1;
+                        uint32_t dummy        : 3;
+                } c1;
+        };
+        union PACKED {
+                struct PACKED {
+                        uint32_t src3         : 11;
+                        uint32_t must_be_zero2: 2;
+                        uint32_t src3_r       : 1;
+                        uint32_t src2_neg     : 1;
+                        uint32_t src3_neg     : 1;
+                };
+                struct PACKED {
+                        uint32_t src3         : 10;
+                        uint32_t src3_c       : 1;
+                        uint32_t src3_rel     : 1;
+                        uint32_t must_be_zero : 1;
+                        uint32_t dummy        : 3;
+                } rel2;
+                struct PACKED {
+                        uint32_t src3         : 12;
+                        uint32_t src3_c       : 1;
+                        uint32_t dummy        : 3;
+                } c2;
+        };
+        /* dword1: */
+        uint32_t dst      : 8;
+        uint32_t repeat   : 3;
+        uint32_t src1_r   : 1;
+        uint32_t ss       : 1;
+        uint32_t ul       : 1;
+        uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+        uint32_t src2     : 8;
+        uint32_t opc      : 4;
+        uint32_t jmp_tgt  : 1;
+        uint32_t sync     : 1;
+        uint32_t opc_cat  : 3;
+} instr_cat3_t;
+static inline bool instr_cat3_full(instr_cat3_t *cat3)
+{
+        switch (cat3->opc) {
+        case OPC_MAD_F16:
+        case OPC_MAD_U16:
+        case OPC_MAD_S16:
+        case OPC_SEL_B16:
+        case OPC_SEL_S16:
+        case OPC_SEL_F16:
+        case OPC_SAD_S16:
+        case OPC_SAD_S32:  // really??
+                return false;
+        default:
+                return true;
+        }
+}
+typedef struct PACKED {
+        /* dword0: */
+        union PACKED {
+                struct PACKED {
+                        uint32_t src          : 11;
+                        uint32_t must_be_zero1: 2;
+                        uint32_t src_im       : 1;   /* immediate */
+                        uint32_t src_neg      : 1;   /* negate */
+                        uint32_t src_abs      : 1;   /* absolute value */
+                };
+                struct PACKED {
+                        uint32_t src          : 10;
+                        uint32_t src_c        : 1;   /* relative-const */
+                        uint32_t src_rel      : 1;   /* relative address */
+                        uint32_t must_be_zero : 1;
+                        uint32_t dummy        : 3;
+                } rel;
+                struct PACKED {
+                        uint32_t src          : 12;
+                        uint32_t src_c        : 1;   /* const */
+                        uint32_t dummy        : 3;
+                } c;
+        };
+        uint32_t dummy1   : 16;  /* seem to be ignored */
+        /* dword1: */
+        uint32_t dst      : 8;
+        uint32_t repeat   : 3;
+        uint32_t src_r    : 1;
+        uint32_t ss       : 1;
+        uint32_t ul       : 1;
+        uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+        uint32_t dummy2   : 5;   /* seem to be ignored */
+        uint32_t full     : 1;   /* not half */
+        uint32_t opc      : 6;
+        uint32_t jmp_tgt  : 1;
+        uint32_t sync     : 1;
+        uint32_t opc_cat  : 3;
+} instr_cat4_t;
+typedef struct PACKED {
+        /* dword0: */
+        union PACKED {
+                /* normal case: */
+                struct PACKED {
+                        uint32_t full     : 1;   /* not half */
+                        uint32_t src1     : 8;
+                        uint32_t src2     : 8;
+                        uint32_t dummy1   : 4;   /* seem to be ignored */
+                        uint32_t samp     : 4;
+                        uint32_t tex      : 7;
+                } norm;
+                /* s2en case: */
+                struct PACKED {
+                        uint32_t full     : 1;   /* not half */
+                        uint32_t src1     : 8;
+                        uint32_t src2     : 11;
+                        uint32_t dummy1   : 1;
+                        uint32_t src3     : 8;
+                        uint32_t dummy2   : 3;
+                } s2en;
+                /* same in either case: */
+                // XXX I think, confirm this
+                struct PACKED {
+                        uint32_t full     : 1;   /* not half */
+                        uint32_t src1     : 8;
+                        uint32_t pad      : 23;
+                };
+        };
+        /* dword1: */
+        uint32_t dst      : 8;
+        uint32_t wrmask   : 4;   /* write-mask */
+        uint32_t type     : 3;
+        uint32_t dummy2   : 1;   /* seems to be ignored */
+        uint32_t is_3d    : 1;
+        uint32_t is_a     : 1;
+        uint32_t is_s     : 1;
+        uint32_t is_s2en  : 1;
+        uint32_t is_o     : 1;
+        uint32_t is_p     : 1;
+        uint32_t opc      : 5;
+        uint32_t jmp_tgt  : 1;
+        uint32_t sync     : 1;
+        uint32_t opc_cat  : 3;
+} instr_cat5_t;
+/* [src1 + off], src2: */
+typedef struct PACKED {
+        /* dword0: */
+        uint32_t mustbe1  : 1;
+        int32_t  off      : 13;
+        uint32_t src1     : 8;
+        uint32_t src1_im  : 1;
+        uint32_t src2_im  : 1;
+        uint32_t src2     : 8;
+        /* dword1: */
+        uint32_t dst      : 8;
+        uint32_t dummy2   : 9;
+        uint32_t type     : 3;
+        uint32_t dummy3   : 2;
+        uint32_t opc      : 5;
+        uint32_t jmp_tgt  : 1;
+        uint32_t sync     : 1;
+        uint32_t opc_cat  : 3;
+} instr_cat6a_t;
+/* [src1], src2: */
+typedef struct PACKED {
+        /* dword0: */
+        uint32_t mustbe0  : 1;
+        uint32_t src1     : 8;
+        uint32_t ignore0  : 13;
+        uint32_t src1_im  : 1;
+        uint32_t src2_im  : 1;
+        uint32_t src2     : 8;
+        /* dword1: */
+        uint32_t dst      : 8;
+        uint32_t dummy2   : 9;
+        uint32_t type     : 3;
+        uint32_t dummy3   : 2;
+        uint32_t opc      : 5;
+        uint32_t jmp_tgt  : 1;
+        uint32_t sync     : 1;
+        uint32_t opc_cat  : 3;
+} instr_cat6b_t;
+/* I think some of the other cat6 instructions use additional
+ * sub-encodings..
+ */
+typedef union PACKED {
+        instr_cat6a_t a;
+        instr_cat6b_t b;
+        struct PACKED {
+                /* dword0: */
+                uint32_t has_off  : 1;
+                uint32_t pad1     : 31;
+                /* dword1: */
+                uint32_t dst      : 8;
+                uint32_t dummy2   : 9;
+                uint32_t type     : 3;
+                uint32_t dummy3   : 2;
+                uint32_t opc      : 5;
+                uint32_t jmp_tgt  : 1;
+                uint32_t sync     : 1;
+                uint32_t opc_cat  : 3;
+        };
+} instr_cat6_t;
+typedef union PACKED {
+        instr_cat0_t cat0;
+        instr_cat1_t cat1;
+        instr_cat2_t cat2;
+        instr_cat3_t cat3;
+        instr_cat4_t cat4;
+        instr_cat5_t cat5;
+        instr_cat6_t cat6;
+        struct PACKED {
+                /* dword0: */
+                uint64_t pad1     : 40;
+                uint32_t repeat   : 3;  /* cat0-cat4 */
+                uint32_t pad2     : 1;
+                uint32_t ss       : 1;  /* cat1-cat4 (cat0??) */
+                uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
+                uint32_t pad3     : 13;
+                uint32_t jmp_tgt  : 1;
+                uint32_t sync     : 1;
+                uint32_t opc_cat  : 3;
+        };
+} instr_t;
+static inline uint32_t instr_opc(instr_t *instr)
+{
+        switch (instr->opc_cat) {
+        case 0:  return instr->cat0.opc;
+        case 1:  return 0;
+        case 2:  return instr->cat2.opc;
+        case 3:  return instr->cat3.opc;
+        case 4:  return instr->cat4.opc;
+        case 5:  return instr->cat5.opc;
+        case 6:  return instr->cat6.opc;
+        default: return 0;
+        }
+}
+static inline bool is_mad(opc_t opc)
+{
+        switch (opc) {
+        case OPC_MAD_U16:
+        case OPC_MAD_S16:
+        case OPC_MAD_U24:
+        case OPC_MAD_S24:
+        case OPC_MAD_F16:
+        case OPC_MAD_F32:
+                return true;
+        default:
+                return false;
+        }
+}
+static inline bool is_madsh(opc_t opc)
+{
+        switch (opc) {
+        case OPC_MADSH_U16:
+        case OPC_MADSH_M16:
+                return true;
+        default:
+                return false;
+        }
+}
+#endif /* INSTR_A3XX_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3.c
 ,0 → 1,703
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ir3.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+#include "freedreno_util.h"
+#include "instr-a3xx.h"
+#define CHUNK_SZ 1020
+struct ir3_heap_chunk {
+        struct ir3_heap_chunk *next;
+        uint32_t heap[CHUNK_SZ];
+};
+static void grow_heap(struct ir3 *shader)
+{
+        struct ir3_heap_chunk *chunk = calloc(1, sizeof(*chunk));
+        chunk->next = shader->chunk;
+        shader->chunk = chunk;
+        shader->heap_idx = 0;
+}
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+void * ir3_alloc(struct ir3 *shader, int sz)
+{
+        void *ptr;
+        sz = align(sz, 4) / 4;
+        if ((shader->heap_idx + sz) > CHUNK_SZ)
+                grow_heap(shader);
+        ptr = &shader->chunk->heap[shader->heap_idx];
+        shader->heap_idx += sz;
+        return ptr;
+}
+struct ir3 * ir3_create(void)
+{
+        struct ir3 *shader =
+                        calloc(1, sizeof(struct ir3));
+        grow_heap(shader);
+        return shader;
+}
+void ir3_destroy(struct ir3 *shader)
+{
+        while (shader->chunk) {
+                struct ir3_heap_chunk *chunk = shader->chunk;
+                shader->chunk = chunk->next;
+                free(chunk);
+        }
+        free(shader->instrs);
+        free(shader->baryfs);
+        free(shader);
+}
+#define iassert(cond) do { \
+        if (!(cond)) { \
+                assert(cond); \
+                return -1; \
+        } } while (0)
+static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
+                uint32_t repeat, uint32_t valid_flags)
+{
+        reg_t val = { .dummy32 = 0 };
+        if (reg->flags & ~valid_flags) {
+                debug_printf("INVALID FLAGS: %x vs %x\n",
+                                reg->flags, valid_flags);
+        }
+        if (!(reg->flags & IR3_REG_R))
+                repeat = 0;
+        if (reg->flags & IR3_REG_IMMED) {
+                val.iim_val = reg->iim_val;
+        } else {
+                unsigned components;
+                if (reg->flags & IR3_REG_RELATIV) {
+                        components = reg->size;
+                        val.dummy10 = reg->offset;
+                } else {
+                        components = util_last_bit(reg->wrmask);
+                        val.comp = reg->num & 0x3;
+                        val.num  = reg->num >> 2;
+                }
+                int16_t max = (reg->num + repeat + components - 1) >> 2;
+                if (reg->flags & IR3_REG_CONST) {
+                        info->max_const = MAX2(info->max_const, max);
+                } else if (val.num == 63) {
+                        /* ignore writes to dummy register r63.x */
+                } else if ((max != REG_A0) && (max != REG_P0)) {
+                        if (reg->flags & IR3_REG_HALF) {
+                                info->max_half_reg = MAX2(info->max_half_reg, max);
+                        } else {
+                                info->max_reg = MAX2(info->max_reg, max);
+                        }
+                }
+        }
+        return val.dummy32;
+}
+static int emit_cat0(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info)
+{
+        instr_cat0_t *cat0 = ptr;
+        cat0->immed    = instr->cat0.immed;
+        cat0->repeat   = instr->repeat;
+        cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
+        cat0->inv      = instr->cat0.inv;
+        cat0->comp     = instr->cat0.comp;
+        cat0->opc      = instr->opc;
+        cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+        cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
+        cat0->opc_cat  = 0;
+        return 0;
+}
+static uint32_t type_flags(type_t type)
+{
+        return (type_size(type) == 32) ? 0 : IR3_REG_HALF;
+}
+static int emit_cat1(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info)
+{
+        struct ir3_register *dst = instr->regs[0];
+        struct ir3_register *src = instr->regs[1];
+        instr_cat1_t *cat1 = ptr;
+        iassert(instr->regs_count == 2);
+        iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF));
+        iassert((src->flags & IR3_REG_IMMED) ||
+                        !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF));
+        if (src->flags & IR3_REG_IMMED) {
+                cat1->iim_val = src->iim_val;
+                cat1->src_im  = 1;
+        } else if (src->flags & IR3_REG_RELATIV) {
+                cat1->off       = reg(src, info, instr->repeat,
+                                IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV);
+                cat1->src_rel   = 1;
+                cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
+        } else {
+                cat1->src  = reg(src, info, instr->repeat,
+                                IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
+                cat1->src_c     = !!(src->flags & IR3_REG_CONST);
+        }
+        cat1->dst      = reg(dst, info, instr->repeat,
+                        IR3_REG_RELATIV | IR3_REG_EVEN |
+                        IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
+        cat1->repeat   = instr->repeat;
+        cat1->src_r    = !!(src->flags & IR3_REG_R);
+        cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
+        cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
+        cat1->dst_type = instr->cat1.dst_type;
+        cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
+        cat1->src_type = instr->cat1.src_type;
+        cat1->even     = !!(dst->flags & IR3_REG_EVEN);
+        cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
+        cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+        cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
+        cat1->opc_cat  = 1;
+        return 0;
+}
+static int emit_cat2(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info)
+{
+        struct ir3_register *dst = instr->regs[0];
+        struct ir3_register *src1 = instr->regs[1];
+        struct ir3_register *src2 = instr->regs[2];
+        instr_cat2_t *cat2 = ptr;
+        unsigned absneg = ir3_cat2_absneg(instr->opc);
+        iassert((instr->regs_count == 2) || (instr->regs_count == 3));
+        if (src1->flags & IR3_REG_RELATIV) {
+                iassert(src1->num < (1 << 10));
+                cat2->rel1.src1      = reg(src1, info, instr->repeat,
+                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+                                IR3_REG_HALF | absneg);
+                cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
+                cat2->rel1.src1_rel  = 1;
+        } else if (src1->flags & IR3_REG_CONST) {
+                iassert(src1->num < (1 << 12));
+                cat2->c1.src1   = reg(src1, info, instr->repeat,
+                                IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+                cat2->c1.src1_c = 1;
+        } else {
+                iassert(src1->num < (1 << 11));
+                cat2->src1 = reg(src1, info, instr->repeat,
+                                IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
+                                absneg);
+        }
+        cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
+        cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+        cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
+        cat2->src1_r   = !!(src1->flags & IR3_REG_R);
+        if (src2) {
+                iassert((src2->flags & IR3_REG_IMMED) ||
+                                !((src1->flags ^ src2->flags) & IR3_REG_HALF));
+                if (src2->flags & IR3_REG_RELATIV) {
+                        iassert(src2->num < (1 << 10));
+                        cat2->rel2.src2      = reg(src2, info, instr->repeat,
+                                        IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+                                        IR3_REG_HALF | absneg);
+                        cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
+                        cat2->rel2.src2_rel  = 1;
+                } else if (src2->flags & IR3_REG_CONST) {
+                        iassert(src2->num < (1 << 12));
+                        cat2->c2.src2   = reg(src2, info, instr->repeat,
+                                        IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+                        cat2->c2.src2_c = 1;
+                } else {
+                        iassert(src2->num < (1 << 11));
+                        cat2->src2 = reg(src2, info, instr->repeat,
+                                        IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF |
+                                        absneg);
+                }
+                cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
+                cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+                cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
+                cat2->src2_r   = !!(src2->flags & IR3_REG_R);
+        }
+        cat2->dst      = reg(dst, info, instr->repeat,
+                        IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
+        cat2->repeat   = instr->repeat;
+        cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
+        cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
+        cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
+        cat2->ei       = !!(dst->flags & IR3_REG_EI);
+        cat2->cond     = instr->cat2.condition;
+        cat2->full     = ! (src1->flags & IR3_REG_HALF);
+        cat2->opc      = instr->opc;
+        cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+        cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
+        cat2->opc_cat  = 2;
+        return 0;
+}
+static int emit_cat3(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info)
+{
+        struct ir3_register *dst = instr->regs[0];
+        struct ir3_register *src1 = instr->regs[1];
+        struct ir3_register *src2 = instr->regs[2];
+        struct ir3_register *src3 = instr->regs[3];
+        unsigned absneg = ir3_cat3_absneg(instr->opc);
+        instr_cat3_t *cat3 = ptr;
+        uint32_t src_flags = 0;
+        switch (instr->opc) {
+        case OPC_MAD_F16:
+        case OPC_MAD_U16:
+        case OPC_MAD_S16:
+        case OPC_SEL_B16:
+        case OPC_SEL_S16:
+        case OPC_SEL_F16:
+        case OPC_SAD_S16:
+        case OPC_SAD_S32:  // really??
+                src_flags |= IR3_REG_HALF;
+                break;
+        default:
+                break;
+        }
+        iassert(instr->regs_count == 4);
+        iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
+        iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
+        iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
+        if (src1->flags & IR3_REG_RELATIV) {
+                iassert(src1->num < (1 << 10));
+                cat3->rel1.src1      = reg(src1, info, instr->repeat,
+                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+                                IR3_REG_HALF | absneg);
+                cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
+                cat3->rel1.src1_rel  = 1;
+        } else if (src1->flags & IR3_REG_CONST) {
+                iassert(src1->num < (1 << 12));
+                cat3->c1.src1   = reg(src1, info, instr->repeat,
+                                IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+                cat3->c1.src1_c = 1;
+        } else {
+                iassert(src1->num < (1 << 11));
+                cat3->src1 = reg(src1, info, instr->repeat,
+                                IR3_REG_R | IR3_REG_HALF | absneg);
+        }
+        cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+        cat3->src1_r   = !!(src1->flags & IR3_REG_R);
+        cat3->src2     = reg(src2, info, instr->repeat,
+                        IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
+        cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
+        cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+        cat3->src2_r   = !!(src2->flags & IR3_REG_R);
+        if (src3->flags & IR3_REG_RELATIV) {
+                iassert(src3->num < (1 << 10));
+                cat3->rel2.src3      = reg(src3, info, instr->repeat,
+                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R |
+                                IR3_REG_HALF | absneg);
+                cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
+                cat3->rel2.src3_rel  = 1;
+        } else if (src3->flags & IR3_REG_CONST) {
+                iassert(src3->num < (1 << 12));
+                cat3->c2.src3   = reg(src3, info, instr->repeat,
+                                IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF);
+                cat3->c2.src3_c = 1;
+        } else {
+                iassert(src3->num < (1 << 11));
+                cat3->src3 = reg(src3, info, instr->repeat,
+                                IR3_REG_R | IR3_REG_HALF | absneg);
+        }
+        cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
+        cat3->src3_r   = !!(src3->flags & IR3_REG_R);
+        cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+        cat3->repeat   = instr->repeat;
+        cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
+        cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
+        cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
+        cat3->opc      = instr->opc;
+        cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+        cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
+        cat3->opc_cat  = 3;
+        return 0;
+}
+static int emit_cat4(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info)
+{
+        struct ir3_register *dst = instr->regs[0];
+        struct ir3_register *src = instr->regs[1];
+        instr_cat4_t *cat4 = ptr;
+        iassert(instr->regs_count == 2);
+        if (src->flags & IR3_REG_RELATIV) {
+                iassert(src->num < (1 << 10));
+                cat4->rel.src      = reg(src, info, instr->repeat,
+                                IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG |
+                                IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF);
+                cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
+                cat4->rel.src_rel  = 1;
+        } else if (src->flags & IR3_REG_CONST) {
+                iassert(src->num < (1 << 12));
+                cat4->c.src   = reg(src, info, instr->repeat,
+                                IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS |
+                                IR3_REG_R | IR3_REG_HALF);
+                cat4->c.src_c = 1;
+        } else {
+                iassert(src->num < (1 << 11));
+                cat4->src = reg(src, info, instr->repeat,
+                                IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
+                                IR3_REG_R | IR3_REG_HALF);
+        }
+        cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
+        cat4->src_neg  = !!(src->flags & IR3_REG_FNEG);
+        cat4->src_abs  = !!(src->flags & IR3_REG_FABS);
+        cat4->src_r    = !!(src->flags & IR3_REG_R);
+        cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+        cat4->repeat   = instr->repeat;
+        cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
+        cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
+        cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
+        cat4->full     = ! (src->flags & IR3_REG_HALF);
+        cat4->opc      = instr->opc;
+        cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+        cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
+        cat4->opc_cat  = 4;
+        return 0;
+}
+static int emit_cat5(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info)
+{
+        struct ir3_register *dst = instr->regs[0];
+        struct ir3_register *src1 = instr->regs[1];
+        struct ir3_register *src2 = instr->regs[2];
+        struct ir3_register *src3 = instr->regs[3];
+        instr_cat5_t *cat5 = ptr;
+        iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF));
+        if (src1) {
+                cat5->full = ! (src1->flags & IR3_REG_HALF);
+                cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
+        }
+        if (instr->flags & IR3_INSTR_S2EN) {
+                if (src2) {
+                        iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+                        cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+                }
+                if (src3) {
+                        iassert(src3->flags & IR3_REG_HALF);
+                        cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
+                }
+                iassert(!(instr->cat5.samp | instr->cat5.tex));
+        } else {
+                iassert(!src3);
+                if (src2) {
+                        iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+                        cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+                }
+                cat5->norm.samp = instr->cat5.samp;
+                cat5->norm.tex  = instr->cat5.tex;
+        }
+        cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+        cat5->wrmask   = dst->wrmask;
+        cat5->type     = instr->cat5.type;
+        cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
+        cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
+        cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
+        cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
+        cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
+        cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
+        cat5->opc      = instr->opc;
+        cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+        cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
+        cat5->opc_cat  = 5;
+        return 0;
+}
+static int emit_cat6(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info)
+{
+        struct ir3_register *dst  = instr->regs[0];
+        struct ir3_register *src1 = instr->regs[1];
+        struct ir3_register *src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+        instr_cat6_t *cat6 = ptr;
+        iassert(instr->regs_count >= 2);
+        if (instr->cat6.offset || instr->opc == OPC_LDG) {
+                instr_cat6a_t *cat6a = ptr;
+                cat6->has_off = true;
+                cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+                cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+                cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
+                if (src2) {
+                        cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+                        cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
+                }
+                cat6a->off = instr->cat6.offset;
+        } else {
+                instr_cat6b_t *cat6b = ptr;
+                cat6->has_off = false;
+                cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+                cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+                cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
+                if (src2) {
+                        cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+                        cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
+                }
+        }
+        cat6->type     = instr->cat6.type;
+        cat6->opc      = instr->opc;
+        cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+        cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+        cat6->opc_cat  = 6;
+        return 0;
+}
+static int (*emit[])(struct ir3_instruction *instr, void *ptr,
+                struct ir3_info *info) = {
+        emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
+};
+void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
+                uint32_t gpu_id)
+{
+        uint32_t *ptr, *dwords;
+        uint32_t i;
+        info->max_reg       = -1;
+        info->max_half_reg  = -1;
+        info->max_const     = -1;
+        info->instrs_count  = 0;
+        /* need a integer number of instruction "groups" (sets of 16
+         * instructions on a4xx or sets of 4 instructions on a3xx),
+         * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
+         */
+        if (gpu_id >= 400) {
+                info->sizedwords = 2 * align(shader->instrs_count, 16);
+        } else {
+                info->sizedwords = 2 * align(shader->instrs_count, 4);
+        }
+        ptr = dwords = calloc(4, info->sizedwords);
+        for (i = 0; i < shader->instrs_count; i++) {
+                struct ir3_instruction *instr = shader->instrs[i];
+                int ret = emit[instr->category](instr, dwords, info);
+                if (ret)
+                        goto fail;
+                info->instrs_count += 1 + instr->repeat;
+                dwords += 2;
+        }
+        return ptr;
+fail:
+        free(ptr);
+        return NULL;
+}
+static struct ir3_register * reg_create(struct ir3 *shader,
+                int num, int flags)
+{
+        struct ir3_register *reg =
+                        ir3_alloc(shader, sizeof(struct ir3_register));
+        reg->wrmask = 1;
+        reg->flags = flags;
+        reg->num = num;
+        return reg;
+}
+static void insert_instr(struct ir3 *shader,
+                struct ir3_instruction *instr)
+{
+#ifdef DEBUG
+        static uint32_t serialno = 0;
+        instr->serialno = ++serialno;
+#endif
+        array_insert(shader->instrs, instr);
+        if (is_input(instr))
+                array_insert(shader->baryfs, instr);
+}
+struct ir3_block * ir3_block_create(struct ir3 *shader,
+                unsigned ntmp, unsigned nin, unsigned nout)
+{
+        struct ir3_block *block;
+        unsigned size;
+        char *ptr;
+        size = sizeof(*block);
+        size += sizeof(block->temporaries[0]) * ntmp;
+        size += sizeof(block->inputs[0]) * nin;
+        size += sizeof(block->outputs[0]) * nout;
+        ptr = ir3_alloc(shader, size);
+        block = (void *)ptr;
+        ptr += sizeof(*block);
+        block->temporaries = (void *)ptr;
+        block->ntemporaries = ntmp;
+        ptr += sizeof(block->temporaries[0]) * ntmp;
+        block->inputs = (void *)ptr;
+        block->ninputs = nin;
+        ptr += sizeof(block->inputs[0]) * nin;
+        block->outputs = (void *)ptr;
+        block->noutputs = nout;
+        ptr += sizeof(block->outputs[0]) * nout;
+        block->shader = shader;
+        return block;
+}
+static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg)
+{
+        struct ir3_instruction *instr;
+        unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0]));
+        char *ptr = ir3_alloc(block->shader, sz);
+        instr = (struct ir3_instruction *)ptr;
+        ptr  += sizeof(*instr);
+        instr->regs = (struct ir3_register **)ptr;
+#ifdef DEBUG
+        instr->regs_max = nreg;
+#endif
+        return instr;
+}
+struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
+                int category, opc_t opc, int nreg)
+{
+        struct ir3_instruction *instr = instr_create(block, nreg);
+        instr->block = block;
+        instr->category = category;
+        instr->opc = opc;
+        insert_instr(block->shader, instr);
+        return instr;
+}
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
+                int category, opc_t opc)
+{
+        /* NOTE: we could be slightly more clever, at least for non-meta,
+         * and choose # of regs based on category.
+         */
+        return ir3_instr_create2(block, category, opc, 4);
+}
+/* only used by old compiler: */
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
+{
+        struct ir3_instruction *new_instr = instr_create(instr->block,
+                        instr->regs_count);
+        struct ir3_register **regs;
+        unsigned i;
+        regs = new_instr->regs;
+        *new_instr = *instr;
+        new_instr->regs = regs;
+        insert_instr(instr->block->shader, new_instr);
+        /* clone registers: */
+        new_instr->regs_count = 0;
+        for (i = 0; i < instr->regs_count; i++) {
+                struct ir3_register *reg = instr->regs[i];
+                struct ir3_register *new_reg =
+                                ir3_reg_create(new_instr, reg->num, reg->flags);
+                *new_reg = *reg;
+        }
+        return new_instr;
+}
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+                int num, int flags)
+{
+        struct ir3_register *reg = reg_create(instr->block->shader, num, flags);
+#ifdef DEBUG
+        debug_assert(instr->regs_count < instr->regs_max);
+#endif
+        instr->regs[instr->regs_count++] = reg;
+        return reg;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3.h
 ,0 → 1,1053
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IR3_H_
+#define IR3_H_
+#include <stdint.h>
+#include <stdbool.h>
+#include "util/u_debug.h"
+#include "instr-a3xx.h"
+#include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
+/* low level intermediate representation of an adreno shader program */
+struct ir3;
+struct ir3_instruction;
+struct ir3_block;
+struct ir3_info {
+        uint16_t sizedwords;
+        uint16_t instrs_count;   /* expanded to account for rpt's */
+        /* NOTE: max_reg, etc, does not include registers not touched
+         * by the shader (ie. vertex fetched via VFD_DECODE but not
+         * touched by shader)
+         */
+        int8_t   max_reg;   /* highest GPR # used by shader */
+        int8_t   max_half_reg;
+        int16_t  max_const;
+};
+struct ir3_register {
+        enum {
+                IR3_REG_CONST  = 0x001,
+                IR3_REG_IMMED  = 0x002,
+                IR3_REG_HALF   = 0x004,
+                IR3_REG_RELATIV= 0x008,
+                IR3_REG_R      = 0x010,
+                /* Most instructions, it seems, can do float abs/neg but not
+                 * integer.  The CP pass needs to know what is intended (int or
+                 * float) in order to do the right thing.  For this reason the
+                 * abs/neg flags are split out into float and int variants.  In
+                 * addition, .b (bitwise) operations, the negate is actually a
+                 * bitwise not, so split that out into a new flag to make it
+                 * more clear.
+                 */
+                IR3_REG_FNEG   = 0x020,
+                IR3_REG_FABS   = 0x040,
+                IR3_REG_SNEG   = 0x080,
+                IR3_REG_SABS   = 0x100,
+                IR3_REG_BNOT   = 0x200,
+                IR3_REG_EVEN   = 0x400,
+                IR3_REG_POS_INF= 0x800,
+                /* (ei) flag, end-input?  Set on last bary, presumably to signal
+                 * that the shader needs no more input:
+                 */
+                IR3_REG_EI     = 0x1000,
+                /* meta-flags, for intermediate stages of IR, ie.
+                 * before register assignment is done:
+                 */
+                IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
+                IR3_REG_IA     = 0x4000,   /* meta-input dst is "assigned" */
+                IR3_REG_ADDR   = 0x8000,   /* register is a0.x */
+        } flags;
+        union {
+                /* normal registers:
+                 * the component is in the low two bits of the reg #, so
+                 * rN.x becomes: (N << 2) | x
+                 */
+                int   num;
+                /* immediate: */
+                int32_t  iim_val;
+                uint32_t uim_val;
+                float    fim_val;
+                /* relative: */
+                int   offset;
+        };
+        /* for IR3_REG_SSA, src registers contain ptr back to
+         * assigning instruction.
+         */
+        struct ir3_instruction *instr;
+        union {
+                /* used for cat5 instructions, but also for internal/IR level
+                 * tracking of what registers are read/written by an instruction.
+                 * wrmask may be a bad name since it is used to represent both
+                 * src and dst that touch multiple adjacent registers.
+                 */
+                unsigned wrmask;
+                /* for relative addressing, 32bits for array size is too small,
+                 * but otoh we don't need to deal with disjoint sets, so instead
+                 * use a simple size field (number of scalar components).
+                 */
+                unsigned size;
+        };
+};
+struct ir3_instruction {
+        struct ir3_block *block;
+        int category;
+        opc_t opc;
+        enum {
+                /* (sy) flag is set on first instruction, and after sample
+                 * instructions (probably just on RAW hazard).
+                 */
+                IR3_INSTR_SY    = 0x001,
+                /* (ss) flag is set on first instruction, and first instruction
+                 * to depend on the result of "long" instructions (RAW hazard):
+                 *
+                 *   rcp, rsq, log2, exp2, sin, cos, sqrt
+                 *
+                 * It seems to synchronize until all in-flight instructions are
+                 * completed, for example:
+                 *
+                 *   rsq hr1.w, hr1.w
+                 *   add.f hr2.z, (neg)hr2.z, hc0.y
+                 *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
+                 *   rsq hr2.x, hr2.x
+                 *   (rpt1)nop
+                 *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
+                 *   nop
+                 *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
+                 *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
+                 *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
+                 *
+                 * The last mul.f does not have (ss) set, presumably because the
+                 * (ss) on the previous instruction does the job.
+                 *
+                 * The blob driver also seems to set it on WAR hazards, although
+                 * not really clear if this is needed or just blob compiler being
+                 * sloppy.  So far I haven't found a case where removing the (ss)
+                 * causes problems for WAR hazard, but I could just be getting
+                 * lucky:
+                 *
+                 *   rcp r1.y, r3.y
+                 *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
+                 *
+                 */
+                IR3_INSTR_SS    = 0x002,
+                /* (jp) flag is set on jump targets:
+                 */
+                IR3_INSTR_JP    = 0x004,
+                IR3_INSTR_UL    = 0x008,
+                IR3_INSTR_3D    = 0x010,
+                IR3_INSTR_A     = 0x020,
+                IR3_INSTR_O     = 0x040,
+                IR3_INSTR_P     = 0x080,
+                IR3_INSTR_S     = 0x100,
+                IR3_INSTR_S2EN  = 0x200,
+                /* meta-flags, for intermediate stages of IR, ie.
+                 * before register assignment is done:
+                 */
+                IR3_INSTR_MARK  = 0x1000,
+        } flags;
+        int repeat;
+#ifdef DEBUG
+        unsigned regs_max;
+#endif
+        unsigned regs_count;
+        struct ir3_register **regs;
+        union {
+                struct {
+                        char inv;
+                        char comp;
+                        int  immed;
+                } cat0;
+                struct {
+                        type_t src_type, dst_type;
+                } cat1;
+                struct {
+                        enum {
+                                IR3_COND_LT = 0,
+                                IR3_COND_LE = 1,
+                                IR3_COND_GT = 2,
+                                IR3_COND_GE = 3,
+                                IR3_COND_EQ = 4,
+                                IR3_COND_NE = 5,
+                        } condition;
+                } cat2;
+                struct {
+                        unsigned samp, tex;
+                        type_t type;
+                } cat5;
+                struct {
+                        type_t type;
+                        int offset;
+                        int iim_val;
+                } cat6;
+                /* for meta-instructions, just used to hold extra data
+                 * before instruction scheduling, etc
+                 */
+                struct {
+                        int off;              /* component/offset */
+                } fo;
+                struct {
+                        int aid;
+                } fi;
+                struct {
+                        struct ir3_block *if_block, *else_block;
+                } flow;
+                struct {
+                        struct ir3_block *block;
+                } inout;
+                /* XXX keep this as big as all other union members! */
+                uint32_t info[3];
+        };
+        /* transient values used during various algorithms: */
+        union {
+                /* The instruction depth is the max dependency distance to output.
+                 *
+                 * You can also think of it as the "cost", if we did any sort of
+                 * optimization for register footprint.  Ie. a value that is  just
+                 * result of moving a const to a reg would have a low cost,  so to
+                 * it could make sense to duplicate the instruction at various
+                 * points where the result is needed to reduce register footprint.
+                 *
+                 * DEPTH_UNUSED used to mark unused instructions after depth
+                 * calculation pass.
+                 */
+#define DEPTH_UNUSED  ~0
+                unsigned depth;
+        };
+        /* Used during CP and RA stages.  For fanin and shader inputs/
+         * outputs where we need a sequence of consecutive registers,
+         * keep track of each src instructions left (ie 'n-1') and right
+         * (ie 'n+1') neighbor.  The front-end must insert enough mov's
+         * to ensure that each instruction has at most one left and at
+         * most one right neighbor.  During the copy-propagation pass,
+         * we only remove mov's when we can preserve this constraint.
+         * And during the RA stage, we use the neighbor information to
+         * allocate a block of registers in one shot.
+         *
+         * TODO: maybe just add something like:
+         *   struct ir3_instruction_ref {
+         *       struct ir3_instruction *instr;
+         *       unsigned cnt;
+         *   }
+         *
+         * Or can we get away without the refcnt stuff?  It seems like
+         * it should be overkill..  the problem is if, potentially after
+         * already eliminating some mov's, if you have a single mov that
+         * needs to be grouped with it's neighbors in two different
+         * places (ex. shader output and a fanin).
+         */
+        struct {
+                struct ir3_instruction *left, *right;
+                uint16_t left_cnt, right_cnt;
+        } cp;
+        /* an instruction can reference at most one address register amongst
+         * it's src/dst registers.  Beyond that, you need to insert mov's.
+         */
+        struct ir3_instruction *address;
+        /* in case of a instruction with relative dst instruction, we need to
+         * capture the dependency on the fanin for the previous values of
+         * the array elements.  Since we don't know at compile time actually
+         * which array elements are written, this serves to preserve the
+         * unconditional write to array elements prior to the conditional
+         * write.
+         *
+         * TODO only cat1 can do indirect write.. we could maybe move this
+         * into instr->cat1.fanin (but would require the frontend to insert
+         * the extra mov)
+         */
+        struct ir3_instruction *fanin;
+        struct ir3_instruction *next;
+#ifdef DEBUG
+        uint32_t serialno;
+#endif
+};
+static inline struct ir3_instruction *
+ir3_neighbor_first(struct ir3_instruction *instr)
+{
+        while (instr->cp.left)
+                instr = instr->cp.left;
+        return instr;
+}
+static inline int ir3_neighbor_count(struct ir3_instruction *instr)
+{
+        int num = 1;
+        debug_assert(!instr->cp.left);
+        while (instr->cp.right) {
+                num++;
+                instr = instr->cp.right;
+        }
+        return num;
+}
+struct ir3_heap_chunk;
+struct ir3 {
+        unsigned instrs_count, instrs_sz;
+        struct ir3_instruction **instrs;
+        /* Track bary.f (and ldlv) instructions.. this is needed in
+         * scheduling to ensure that all varying fetches happen before
+         * any potential kill instructions.  The hw gets grumpy if all
+         * threads in a group are killed before the last bary.f gets
+         * a chance to signal end of input (ei).
+         */
+        unsigned baryfs_count, baryfs_sz;
+        struct ir3_instruction **baryfs;
+        /* Track all indirect instructions (read and write).  To avoid
+         * deadlock scenario where an address register gets scheduled,
+         * but other dependent src instructions cannot be scheduled due
+         * to dependency on a *different* address register value, the
+         * scheduler needs to ensure that all dependencies other than
+         * the instruction other than the address register are scheduled
+         * before the one that writes the address register.  Having a
+         * convenient list of instructions that reference some address
+         * register simplifies this.
+         */
+        unsigned indirects_count, indirects_sz;
+        struct ir3_instruction **indirects;
+        struct ir3_block *block;
+        unsigned heap_idx;
+        struct ir3_heap_chunk *chunk;
+};
+struct ir3_block {
+        struct ir3 *shader;
+        unsigned ntemporaries, ninputs, noutputs;
+        /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
+        struct ir3_instruction **temporaries;
+        struct ir3_instruction **inputs;
+        struct ir3_instruction **outputs;
+        /* only a single address register: */
+        struct ir3_instruction *address;
+        struct ir3_block *parent;
+        struct ir3_instruction *head;
+};
+struct ir3 * ir3_create(void);
+void ir3_destroy(struct ir3 *shader);
+void * ir3_assemble(struct ir3 *shader,
+                struct ir3_info *info, uint32_t gpu_id);
+void * ir3_alloc(struct ir3 *shader, int sz);
+struct ir3_block * ir3_block_create(struct ir3 *shader,
+                unsigned ntmp, unsigned nin, unsigned nout);
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
+                int category, opc_t opc);
+struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
+                int category, opc_t opc, int nreg);
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+const char *ir3_instr_name(struct ir3_instruction *instr);
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+                int num, int flags);
+static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
+{
+        if (instr->flags & IR3_INSTR_MARK)
+                return true;  /* already visited */
+        instr->flags |= IR3_INSTR_MARK;
+        return false;
+}
+static inline void ir3_clear_mark(struct ir3 *shader)
+{
+        /* TODO would be nice to drop the instruction array.. for
+         * new compiler, _clear_mark() is all we use it for, and
+         * we could probably manage a linked list instead..
+         *
+         * Also, we'll probably want to mark instructions within
+         * a block, so tracking the list of instrs globally is
+         * unlikely to be what we want.
+         */
+        unsigned i;
+        for (i = 0; i < shader->instrs_count; i++) {
+                struct ir3_instruction *instr = shader->instrs[i];
+                instr->flags &= ~IR3_INSTR_MARK;
+        }
+}
+static inline int ir3_instr_regno(struct ir3_instruction *instr,
+                struct ir3_register *reg)
+{
+        unsigned i;
+        for (i = 0; i < instr->regs_count; i++)
+                if (reg == instr->regs[i])
+                        return i;
+        return -1;
+}
+#define MAX_ARRAYS 16
+/* comp:
+ *   0 - x
+ *   1 - y
+ *   2 - z
+ *   3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+        return (num << 2) | (comp & 0x3);
+}
+static inline uint32_t reg_num(struct ir3_register *reg)
+{
+        return reg->num >> 2;
+}
+static inline uint32_t reg_comp(struct ir3_register *reg)
+{
+        return reg->num & 0x3;
+}
+static inline bool is_flow(struct ir3_instruction *instr)
+{
+        return (instr->category == 0);
+}
+static inline bool is_kill(struct ir3_instruction *instr)
+{
+        return is_flow(instr) && (instr->opc == OPC_KILL);
+}
+static inline bool is_nop(struct ir3_instruction *instr)
+{
+        return is_flow(instr) && (instr->opc == OPC_NOP);
+}
+/* Is it a non-transformative (ie. not type changing) mov?  This can
+ * also include absneg.s/absneg.f, which for the most part can be
+ * treated as a mov (single src argument).
+ */
+static inline bool is_same_type_mov(struct ir3_instruction *instr)
+{
+        struct ir3_register *dst = instr->regs[0];
+        /* mov's that write to a0.x or p0.x are special: */
+        if (dst->num == regid(REG_P0, 0))
+                return false;
+        if (dst->num == regid(REG_A0, 0))
+                return false;
+        if ((instr->category == 1) &&
+                        (instr->cat1.src_type == instr->cat1.dst_type))
+                return true;
+        if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
+                        (instr->opc == OPC_ABSNEG_S)))
+                return true;
+        return false;
+}
+static inline bool is_alu(struct ir3_instruction *instr)
+{
+        return (1 <= instr->category) && (instr->category <= 3);
+}
+static inline bool is_sfu(struct ir3_instruction *instr)
+{
+        return (instr->category == 4);
+}
+static inline bool is_tex(struct ir3_instruction *instr)
+{
+        return (instr->category == 5);
+}
+static inline bool is_mem(struct ir3_instruction *instr)
+{
+        return (instr->category == 6);
+}
+static inline bool is_input(struct ir3_instruction *instr)
+{
+        /* in some cases, ldlv is used to fetch varying without
+         * interpolation.. fortunately inloc is the first src
+         * register in either case
+         */
+        if (is_mem(instr) && (instr->opc == OPC_LDLV))
+                return true;
+        return (instr->category == 2) && (instr->opc == OPC_BARY_F);
+}
+static inline bool is_meta(struct ir3_instruction *instr)
+{
+        /* TODO how should we count PHI (and maybe fan-in/out) which
+         * might actually contribute some instructions to the final
+         * result?
+         */
+        return (instr->category == -1);
+}
+static inline bool writes_addr(struct ir3_instruction *instr)
+{
+        if (instr->regs_count > 0) {
+                struct ir3_register *dst = instr->regs[0];
+                return !!(dst->flags & IR3_REG_ADDR);
+        }
+        return false;
+}
+static inline bool writes_pred(struct ir3_instruction *instr)
+{
+        if (instr->regs_count > 0) {
+                struct ir3_register *dst = instr->regs[0];
+                return reg_num(dst) == REG_P0;
+        }
+        return false;
+}
+/* returns defining instruction for reg */
+/* TODO better name */
+static inline struct ir3_instruction *ssa(struct ir3_register *reg)
+{
+        if (reg->flags & IR3_REG_SSA)
+                return reg->instr;
+        return NULL;
+}
+static inline bool conflicts(struct ir3_instruction *a,
+                struct ir3_instruction *b)
+{
+        return (a && b) && (a != b);
+}
+static inline bool reg_gpr(struct ir3_register *r)
+{
+        if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
+                return false;
+        if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
+                return false;
+        return true;
+}
+/* some cat2 instructions (ie. those which are not float) can embed an
+ * immediate:
+ */
+static inline bool ir3_cat2_int(opc_t opc)
+{
+        switch (opc) {
+        case OPC_ADD_U:
+        case OPC_ADD_S:
+        case OPC_SUB_U:
+        case OPC_SUB_S:
+        case OPC_CMPS_U:
+        case OPC_CMPS_S:
+        case OPC_MIN_U:
+        case OPC_MIN_S:
+        case OPC_MAX_U:
+        case OPC_MAX_S:
+        case OPC_CMPV_U:
+        case OPC_CMPV_S:
+        case OPC_MUL_U:
+        case OPC_MUL_S:
+        case OPC_MULL_U:
+        case OPC_CLZ_S:
+        case OPC_ABSNEG_S:
+        case OPC_AND_B:
+        case OPC_OR_B:
+        case OPC_NOT_B:
+        case OPC_XOR_B:
+        case OPC_BFREV_B:
+        case OPC_CLZ_B:
+        case OPC_SHL_B:
+        case OPC_SHR_B:
+        case OPC_ASHR_B:
+        case OPC_MGEN_B:
+        case OPC_GETBIT_B:
+        case OPC_CBITS_B:
+        case OPC_BARY_F:
+                return true;
+        default:
+                return false;
+        }
+}
+/* map cat2 instruction to valid abs/neg flags: */
+static inline unsigned ir3_cat2_absneg(opc_t opc)
+{
+        switch (opc) {
+        case OPC_ADD_F:
+        case OPC_MIN_F:
+        case OPC_MAX_F:
+        case OPC_MUL_F:
+        case OPC_SIGN_F:
+        case OPC_CMPS_F:
+        case OPC_ABSNEG_F:
+        case OPC_CMPV_F:
+        case OPC_FLOOR_F:
+        case OPC_CEIL_F:
+        case OPC_RNDNE_F:
+        case OPC_RNDAZ_F:
+        case OPC_TRUNC_F:
+        case OPC_BARY_F:
+                return IR3_REG_FABS | IR3_REG_FNEG;
+        case OPC_ADD_U:
+        case OPC_ADD_S:
+        case OPC_SUB_U:
+        case OPC_SUB_S:
+        case OPC_CMPS_U:
+        case OPC_CMPS_S:
+        case OPC_MIN_U:
+        case OPC_MIN_S:
+        case OPC_MAX_U:
+        case OPC_MAX_S:
+        case OPC_CMPV_U:
+        case OPC_CMPV_S:
+        case OPC_MUL_U:
+        case OPC_MUL_S:
+        case OPC_MULL_U:
+        case OPC_CLZ_S:
+                return 0;
+        case OPC_ABSNEG_S:
+                return IR3_REG_SABS | IR3_REG_SNEG;
+        case OPC_AND_B:
+        case OPC_OR_B:
+        case OPC_NOT_B:
+        case OPC_XOR_B:
+        case OPC_BFREV_B:
+        case OPC_CLZ_B:
+        case OPC_SHL_B:
+        case OPC_SHR_B:
+        case OPC_ASHR_B:
+        case OPC_MGEN_B:
+        case OPC_GETBIT_B:
+        case OPC_CBITS_B:
+                return IR3_REG_BNOT;
+        default:
+                return 0;
+        }
+}
+/* map cat3 instructions to valid abs/neg flags: */
+static inline unsigned ir3_cat3_absneg(opc_t opc)
+{
+        switch (opc) {
+        case OPC_MAD_F16:
+        case OPC_MAD_F32:
+        case OPC_SEL_F16:
+        case OPC_SEL_F32:
+                return IR3_REG_FNEG;
+        case OPC_MAD_U16:
+        case OPC_MADSH_U16:
+        case OPC_MAD_S16:
+        case OPC_MADSH_M16:
+        case OPC_MAD_U24:
+        case OPC_MAD_S24:
+        case OPC_SEL_S16:
+        case OPC_SEL_S32:
+        case OPC_SAD_S16:
+        case OPC_SAD_S32:
+                /* neg *may* work on 3rd src.. */
+        case OPC_SEL_B16:
+        case OPC_SEL_B32:
+        default:
+                return 0;
+        }
+}
+#define array_insert(arr, val) do { \
+                if (arr ## _count == arr ## _sz) { \
+                        arr ## _sz = MAX2(2 * arr ## _sz, 16); \
+                        arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \
+                } \
+                arr[arr ##_count++] = val; \
+        } while (0)
+/* iterator for an instructions's sources (reg), also returns src #: */
+#define foreach_src_n(__srcreg, __n, __instr) \
+        if ((__instr)->regs_count) \
+                for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
+                        if ((__srcreg = (__instr)->regs[__n + 1]))
+/* iterator for an instructions's sources (reg): */
+#define foreach_src(__srcreg, __instr) \
+        foreach_src_n(__srcreg, __i, __instr)
+static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
+{
+        if (instr->fanin)
+                return instr->regs_count + 2;
+        if (instr->address)
+                return instr->regs_count + 1;
+        return instr->regs_count;
+}
+static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
+{
+        if (n == (instr->regs_count + 1))
+                return instr->fanin;
+        if (n == (instr->regs_count + 0))
+                return instr->address;
+        return ssa(instr->regs[n]);
+}
+#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
+/* iterator for an instruction's SSA sources (instr), also returns src #: */
+#define foreach_ssa_src_n(__srcinst, __n, __instr) \
+        if ((__instr)->regs_count) \
+                for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
+                        if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
+/* iterator for an instruction's SSA sources (instr): */
+#define foreach_ssa_src(__srcinst, __instr) \
+        foreach_ssa_src_n(__srcinst, __i, __instr)
+/* dump: */
+#include <stdio.h>
+void ir3_dump(struct ir3 *shader, const char *name,
+                struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
+                FILE *f);
+void ir3_dump_instr_single(struct ir3_instruction *instr);
+void ir3_dump_instr_list(struct ir3_instruction *instr);
+/* flatten if/else: */
+int ir3_block_flatten(struct ir3_block *block);
+/* depth calculation: */
+int ir3_delayslots(struct ir3_instruction *assigner,
+                struct ir3_instruction *consumer, unsigned n);
+void ir3_block_depth(struct ir3_block *block);
+/* copy-propagate: */
+void ir3_block_cp(struct ir3_block *block);
+/* group neightbors and insert mov's to resolve conflicts: */
+void ir3_block_group(struct ir3_block *block);
+/* scheduling: */
+int ir3_block_sched(struct ir3_block *block);
+/* register assignment: */
+int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+                bool frag_coord, bool frag_face);
+/* legalize: */
+void ir3_block_legalize(struct ir3_block *block,
+                bool *has_samp, int *max_bary);
+/* ************************************************************************* */
+/* instruction helpers */
+static inline struct ir3_instruction *
+ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
+{
+        struct ir3_instruction *instr =
+                ir3_instr_create(block, 1, 0);
+        ir3_reg_create(instr, 0, 0);   /* dst */
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+        instr->cat1.src_type = type;
+        instr->cat1.dst_type = type;
+        return instr;
+}
+static inline struct ir3_instruction *
+ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
+                type_t src_type, type_t dst_type)
+{
+        struct ir3_instruction *instr =
+                ir3_instr_create(block, 1, 0);
+        ir3_reg_create(instr, 0, 0);   /* dst */
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+        instr->cat1.src_type = src_type;
+        instr->cat1.dst_type = dst_type;
+        return instr;
+}
+#define INSTR1(CAT, name)                                                \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+                struct ir3_instruction *a, unsigned aflags)                      \
+{                                                                        \
+        struct ir3_instruction *instr =                                      \
+                ir3_instr_create(block, CAT, OPC_##name);                        \
+        ir3_reg_create(instr, 0, 0);   /* dst */                             \
+        ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
+        return instr;                                                        \
+}
+#define INSTR2(CAT, name)                                                \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+                struct ir3_instruction *a, unsigned aflags,                      \
+                struct ir3_instruction *b, unsigned bflags)                      \
+{                                                                        \
+        struct ir3_instruction *instr =                                      \
+                ir3_instr_create(block, CAT, OPC_##name);                        \
+        ir3_reg_create(instr, 0, 0);   /* dst */                             \
+        ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
+        ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
+        return instr;                                                        \
+}
+#define INSTR3(CAT, name)                                                \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+                struct ir3_instruction *a, unsigned aflags,                      \
+                struct ir3_instruction *b, unsigned bflags,                      \
+                struct ir3_instruction *c, unsigned cflags)                      \
+{                                                                        \
+        struct ir3_instruction *instr =                                      \
+                ir3_instr_create(block, CAT, OPC_##name);                        \
+        ir3_reg_create(instr, 0, 0);   /* dst */                             \
+        ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
+        ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
+        ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c;           \
+        return instr;                                                        \
+}
+/* cat0 instructions: */
+INSTR1(0, KILL);
+/* cat2 instructions, most 2 src but some 1 src: */
+INSTR2(2, ADD_F)
+INSTR2(2, MIN_F)
+INSTR2(2, MAX_F)
+INSTR2(2, MUL_F)
+INSTR1(2, SIGN_F)
+INSTR2(2, CMPS_F)
+INSTR1(2, ABSNEG_F)
+INSTR2(2, CMPV_F)
+INSTR1(2, FLOOR_F)
+INSTR1(2, CEIL_F)
+INSTR1(2, RNDNE_F)
+INSTR1(2, RNDAZ_F)
+INSTR1(2, TRUNC_F)
+INSTR2(2, ADD_U)
+INSTR2(2, ADD_S)
+INSTR2(2, SUB_U)
+INSTR2(2, SUB_S)
+INSTR2(2, CMPS_U)
+INSTR2(2, CMPS_S)
+INSTR2(2, MIN_U)
+INSTR2(2, MIN_S)
+INSTR2(2, MAX_U)
+INSTR2(2, MAX_S)
+INSTR1(2, ABSNEG_S)
+INSTR2(2, AND_B)
+INSTR2(2, OR_B)
+INSTR1(2, NOT_B)
+INSTR2(2, XOR_B)
+INSTR2(2, CMPV_U)
+INSTR2(2, CMPV_S)
+INSTR2(2, MUL_U)
+INSTR2(2, MUL_S)
+INSTR2(2, MULL_U)
+INSTR1(2, BFREV_B)
+INSTR1(2, CLZ_S)
+INSTR1(2, CLZ_B)
+INSTR2(2, SHL_B)
+INSTR2(2, SHR_B)
+INSTR2(2, ASHR_B)
+INSTR2(2, BARY_F)
+INSTR2(2, MGEN_B)
+INSTR2(2, GETBIT_B)
+INSTR1(2, SETRM)
+INSTR1(2, CBITS_B)
+INSTR2(2, SHB)
+INSTR2(2, MSAD)
+/* cat3 instructions: */
+INSTR3(3, MAD_U16)
+INSTR3(3, MADSH_U16)
+INSTR3(3, MAD_S16)
+INSTR3(3, MADSH_M16)
+INSTR3(3, MAD_U24)
+INSTR3(3, MAD_S24)
+INSTR3(3, MAD_F16)
+INSTR3(3, MAD_F32)
+INSTR3(3, SEL_B16)
+INSTR3(3, SEL_B32)
+INSTR3(3, SEL_S16)
+INSTR3(3, SEL_S32)
+INSTR3(3, SEL_F16)
+INSTR3(3, SEL_F32)
+INSTR3(3, SAD_S16)
+INSTR3(3, SAD_S32)
+/* cat4 instructions: */
+INSTR1(4, RCP)
+INSTR1(4, RSQ)
+INSTR1(4, LOG2)
+INSTR1(4, EXP2)
+INSTR1(4, SIN)
+INSTR1(4, COS)
+INSTR1(4, SQRT)
+/* cat5 instructions: */
+INSTR1(5, DSX)
+INSTR1(5, DSY)
+static inline struct ir3_instruction *
+ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
+                unsigned wrmask, unsigned flags, unsigned samp, unsigned tex,
+                struct ir3_instruction *src0, struct ir3_instruction *src1)
+{
+        struct ir3_instruction *sam;
+        struct ir3_register *reg;
+        sam = ir3_instr_create(block, 5, opc);
+        sam->flags |= flags;
+        ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
+        if (src0) {
+                reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
+                reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
+                reg->instr = src0;
+        }
+        if (src1) {
+                reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
+                reg->instr = src1;
+                reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
+        }
+        sam->cat5.samp = samp;
+        sam->cat5.tex  = tex;
+        sam->cat5.type  = type;
+        return sam;
+}
+/* cat6 instructions: */
+INSTR2(6, LDLV)
+INSTR2(6, LDG)
+/* ************************************************************************* */
+/* split this out or find some helper to use.. like main/bitset.h.. */
+#include <string.h>
+#define MAX_REG 256
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
+static inline unsigned regmask_idx(struct ir3_register *reg)
+{
+        unsigned num = reg->num;
+        debug_assert(num < MAX_REG);
+        if (reg->flags & IR3_REG_HALF)
+                num += MAX_REG;
+        return num;
+}
+static inline void regmask_init(regmask_t *regmask)
+{
+        memset(regmask, 0, sizeof(*regmask));
+}
+static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
+{
+        unsigned idx = regmask_idx(reg);
+        if (reg->flags & IR3_REG_RELATIV) {
+                unsigned i;
+                for (i = 0; i < reg->size; i++, idx++)
+                        (*regmask)[idx / 8] |= 1 << (idx % 8);
+        } else {
+                unsigned mask;
+                for (mask = reg->wrmask; mask; mask >>= 1, idx++)
+                        if (mask & 1)
+                                (*regmask)[idx / 8] |= 1 << (idx % 8);
+        }
+}
+static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
+{
+        unsigned i;
+        for (i = 0; i < ARRAY_SIZE(*dst); i++)
+                (*dst)[i] = (*a)[i] | (*b)[i];
+}
+/* set bits in a if not set in b, conceptually:
+ *   a |= (reg & ~b)
+ */
+static inline void regmask_set_if_not(regmask_t *a,
+                struct ir3_register *reg, regmask_t *b)
+{
+        unsigned idx = regmask_idx(reg);
+        if (reg->flags & IR3_REG_RELATIV) {
+                unsigned i;
+                for (i = 0; i < reg->size; i++, idx++)
+                        if (!((*b)[idx / 8] & (1 << (idx % 8))))
+                                (*a)[idx / 8] |= 1 << (idx % 8);
+        } else {
+                unsigned mask;
+                for (mask = reg->wrmask; mask; mask >>= 1, idx++)
+                        if (mask & 1)
+                                if (!((*b)[idx / 8] & (1 << (idx % 8))))
+                                        (*a)[idx / 8] |= 1 << (idx % 8);
+        }
+}
+static inline bool regmask_get(regmask_t *regmask,
+                struct ir3_register *reg)
+{
+        unsigned idx = regmask_idx(reg);
+        if (reg->flags & IR3_REG_RELATIV) {
+                unsigned i;
+                for (i = 0; i < reg->size; i++, idx++)
+                        if ((*regmask)[idx / 8] & (1 << (idx % 8)))
+                                return true;
+        } else {
+                unsigned mask;
+                for (mask = reg->wrmask; mask; mask >>= 1, idx++)
+                        if (mask & 1)
+                                if ((*regmask)[idx / 8] & (1 << (idx % 8)))
+                                        return true;
+        }
+        return false;
+}
+/* ************************************************************************* */
+#endif /* IR3_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
 ,0 → 1,370
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <err.h>
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+#include "tgsi/tgsi_dump.h"
+#include "freedreno_util.h"
+#include "ir3_compiler.h"
+#include "instr-a3xx.h"
+#include "ir3.h"
+static void dump_reg(const char *name, uint32_t r)
+{
+        if (r != regid(63,0))
+                debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
+}
+static void dump_semantic(struct ir3_shader_variant *so,
+                unsigned sem, const char *name)
+{
+        uint32_t regid;
+        regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+        dump_reg(name, regid);
+}
+static void dump_info(struct ir3_shader_variant *so, const char *str)
+{
+        uint32_t *bin;
+        const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
+        // for debug, dump some before/after info:
+        // TODO make gpu_id configurable on cmdline
+        bin = ir3_shader_assemble(so, 320);
+        if (fd_mesa_debug & FD_DBG_DISASM) {
+                struct ir3_block *block = so->ir->block;
+                struct ir3_register *reg;
+                uint8_t regid;
+                unsigned i;
+                debug_printf("; %s: %s\n", type, str);
+                for (i = 0; i < block->ninputs; i++) {
+                        if (!block->inputs[i]) {
+                                debug_printf("; in%d unused\n", i);
+                                continue;
+                        }
+                        reg = block->inputs[i]->regs[0];
+                        regid = reg->num;
+                        debug_printf("@in(%sr%d.%c)\tin%d\n",
+                                        (reg->flags & IR3_REG_HALF) ? "h" : "",
+                                        (regid >> 2), "xyzw"[regid & 0x3], i);
+                }
+                for (i = 0; i < block->noutputs; i++) {
+                        if (!block->outputs[i]) {
+                                debug_printf("; out%d unused\n", i);
+                                continue;
+                        }
+                        /* kill shows up as a virtual output.. skip it! */
+                        if (is_kill(block->outputs[i]))
+                                continue;
+                        reg = block->outputs[i]->regs[0];
+                        regid = reg->num;
+                        debug_printf("@out(%sr%d.%c)\tout%d\n",
+                                        (reg->flags & IR3_REG_HALF) ? "h" : "",
+                                        (regid >> 2), "xyzw"[regid & 0x3], i);
+                }
+                for (i = 0; i < so->immediates_count; i++) {
+                        debug_printf("@const(c%d.x)\t", so->first_immediate + i);
+                        debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
+                                        so->immediates[i].val[0],
+                                        so->immediates[i].val[1],
+                                        so->immediates[i].val[2],
+                                        so->immediates[i].val[3]);
+                }
+                disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
+                debug_printf("; %s: outputs:", type);
+                for (i = 0; i < so->outputs_count; i++) {
+                        uint8_t regid = so->outputs[i].regid;
+                        ir3_semantic sem = so->outputs[i].semantic;
+                        debug_printf(" r%d.%c (%u:%u)",
+                                        (regid >> 2), "xyzw"[regid & 0x3],
+                                        sem2name(sem), sem2idx(sem));
+                }
+                debug_printf("\n");
+                debug_printf("; %s: inputs:", type);
+                for (i = 0; i < so->inputs_count; i++) {
+                        uint8_t regid = so->inputs[i].regid;
+                        ir3_semantic sem = so->inputs[i].semantic;
+                        debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
+                                        (regid >> 2), "xyzw"[regid & 0x3],
+                                        sem2name(sem), sem2idx(sem),
+                                        so->inputs[i].compmask,
+                                        so->inputs[i].inloc,
+                                        so->inputs[i].bary);
+                }
+                debug_printf("\n");
+        }
+        /* print generic shader info: */
+        debug_printf("; %s: %u instructions, %d half, %d full\n", type,
+                        so->info.instrs_count,
+                        so->info.max_half_reg + 1,
+                        so->info.max_reg + 1);
+        /* print shader type specific info: */
+        switch (so->type) {
+        case SHADER_VERTEX:
+                dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
+                dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+                break;
+        case SHADER_FRAGMENT:
+                dump_reg("pos (bary)", so->pos_regid);
+                dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
+                dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+                /* these two are hard-coded since we don't know how to
+                 * program them to anything but all 0's...
+                 */
+                if (so->frag_coord)
+                        debug_printf("; fragcoord: r0.x\n");
+                if (so->frag_face)
+                        debug_printf("; fragface: hr0.x\n");
+                break;
+        case SHADER_COMPUTE:
+                break;
+        }
+        free(bin);
+        debug_printf("\n");
+}
+static int
+read_file(const char *filename, void **ptr, size_t *size)
+{
+        int fd, ret;
+        struct stat st;
+        *ptr = MAP_FAILED;
+        fd = open(filename, O_RDONLY);
+        if (fd == -1) {
+                warnx("couldn't open `%s'", filename);
+                return 1;
+        }
+        ret = fstat(fd, &st);
+        if (ret)
+                errx(1, "couldn't stat `%s'", filename);
+        *size = st.st_size;
+        *ptr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+        if (*ptr == MAP_FAILED)
+                errx(1, "couldn't map `%s'", filename);
+        close(fd);
+        return 0;
+}
+static void reset_variant(struct ir3_shader_variant *v, const char *msg)
+{
+        printf("; %s\n", msg);
+        v->inputs_count = 0;
+        v->outputs_count = 0;
+        v->total_in = 0;
+        v->has_samp = false;
+        v->immediates_count = 0;
+}
+static void print_usage(void)
+{
+        printf("Usage: ir3_compiler [OPTIONS]... FILE\n");
+        printf("    --verbose         - verbose compiler/debug messages\n");
+        printf("    --binning-pass    - generate binning pass shader (VERT)\n");
+        printf("    --color-two-side  - emulate two-sided color (FRAG)\n");
+        printf("    --half-precision  - use half-precision\n");
+        printf("    --saturate-s MASK - bitmask of samplers to saturate S coord\n");
+        printf("    --saturate-t MASK - bitmask of samplers to saturate T coord\n");
+        printf("    --saturate-r MASK - bitmask of samplers to saturate R coord\n");
+        printf("    --nocp            - disable copy propagation\n");
+        printf("    --nir             - use NIR compiler\n");
+        printf("    --help            - show this message\n");
+}
+int main(int argc, char **argv)
+{
+        int ret = 0, n = 1;
+        const char *filename;
+        struct tgsi_token toks[65536];
+        struct tgsi_parse_context parse;
+        struct ir3_shader_variant v;
+        struct ir3_shader_key key = {};
+        const char *info;
+        void *ptr;
+        size_t size;
+        int use_nir = 0;
+        fd_mesa_debug |= FD_DBG_DISASM;
+        /* cmdline args which impact shader variant get spit out in a
+         * comment on the first line..  a quick/dirty way to preserve
+         * that info so when ir3test recompiles the shader with a new
+         * compiler version, we use the same shader-key settings:
+         */
+        debug_printf("; options:");
+        while (n < argc) {
+                if (!strcmp(argv[n], "--verbose")) {
+                        fd_mesa_debug |=  FD_DBG_OPTDUMP | FD_DBG_MSGS | FD_DBG_OPTMSGS;
+                        n++;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--binning-pass")) {
+                        debug_printf(" %s", argv[n]);
+                        key.binning_pass = true;
+                        n++;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--color-two-side")) {
+                        debug_printf(" %s", argv[n]);
+                        key.color_two_side = true;
+                        n++;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--half-precision")) {
+                        debug_printf(" %s", argv[n]);
+                        key.half_precision = true;
+                        n++;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--saturate-s")) {
+                        debug_printf(" %s %s", argv[n], argv[n+1]);
+                        key.vsaturate_s = key.fsaturate_s = strtol(argv[n+1], NULL, 0);
+                        n += 2;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--saturate-t")) {
+                        debug_printf(" %s %s", argv[n], argv[n+1]);
+                        key.vsaturate_t = key.fsaturate_t = strtol(argv[n+1], NULL, 0);
+                        n += 2;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--saturate-r")) {
+                        debug_printf(" %s %s", argv[n], argv[n+1]);
+                        key.vsaturate_r = key.fsaturate_r = strtol(argv[n+1], NULL, 0);
+                        n += 2;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--nocp")) {
+                        fd_mesa_debug |= FD_DBG_NOCP;
+                        n++;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--nir")) {
+                        use_nir = true;
+                        n++;
+                        continue;
+                }
+                if (!strcmp(argv[n], "--help")) {
+                        print_usage();
+                        return 0;
+                }
+                break;
+        }
+        debug_printf("\n");
+        filename = argv[n];
+        memset(&v, 0, sizeof(v));
+        v.key = key;
+        ret = read_file(filename, &ptr, &size);
+        if (ret) {
+                print_usage();
+                return ret;
+        }
+        if (fd_mesa_debug & FD_DBG_OPTMSGS)
+                debug_printf("%s\n", (char *)ptr);
+        if (!tgsi_text_translate(ptr, toks, Elements(toks)))
+                errx(1, "could not parse `%s'", filename);
+        tgsi_parse_init(&parse, toks);
+        switch (parse.FullHeader.Processor.Processor) {
+        case TGSI_PROCESSOR_FRAGMENT:
+                v.type = SHADER_FRAGMENT;
+                break;
+        case TGSI_PROCESSOR_VERTEX:
+                v.type = SHADER_VERTEX;
+                break;
+        case TGSI_PROCESSOR_COMPUTE:
+                v.type = SHADER_COMPUTE;
+                break;
+        }
+        if (use_nir) {
+                info = "NIR compiler";
+                ret = ir3_compile_shader_nir(&v, toks, key);
+        } else {
+                info = "TGSI compiler";
+                ret = ir3_compile_shader(&v, toks, key, true);
+        }
+        if (ret) {
+                reset_variant(&v, "compiler failed, trying without copy propagation!");
+                info = "compiler (no copy propagation)";
+                ret = ir3_compile_shader(&v, toks, key, false);
+        }
+        if (ret) {
+                fprintf(stderr, "compiler failed!\n");
+                return ret;
+        }
+        dump_info(&v, info);
+}
+void _mesa_error_no_memory(const char *caller);
+void
+_mesa_error_no_memory(const char *caller)
+{
+        fprintf(stderr, "Mesa error: out of memory in %s", caller);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
 ,0 → 1,3739
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include <stdarg.h>
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_lowering.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "freedreno_util.h"
+#include "ir3_compiler.h"
+#include "ir3_shader.h"
+#include "instr-a3xx.h"
+#include "ir3.h"
+struct ir3_compile_context {
+        const struct tgsi_token *tokens;
+        bool free_tokens;
+        struct ir3 *ir;
+        struct ir3_shader_variant *so;
+        uint16_t integer_s;
+        struct ir3_block *block;
+        struct ir3_instruction *current_instr;
+        /* we need to defer updates to block->outputs[] until the end
+         * of an instruction (so we don't see new value until *after*
+         * the src registers are processed)
+         */
+        struct {
+                struct ir3_instruction *instr, **instrp;
+        } output_updates[64];
+        unsigned num_output_updates;
+        /* are we in a sequence of "atomic" instructions?
+         */
+        bool atomic;
+        /* For fragment shaders, from the hw perspective the only
+         * actual input is r0.xy position register passed to bary.f.
+         * But TGSI doesn't know that, it still declares things as
+         * IN[] registers.  So we do all the input tracking normally
+         * and fix things up after compile_instructions()
+         *
+         * NOTE that frag_pos is the hardware position (possibly it
+         * is actually an index or tag or some such.. it is *not*
+         * values that can be directly used for gl_FragCoord..)
+         */
+        struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
+        /* For vertex shaders, keep track of the system values sources */
+        struct ir3_instruction *vertex_id, *basevertex, *instance_id;
+        struct tgsi_parse_context parser;
+        unsigned type;
+        struct tgsi_shader_info info;
+        /* hmm, would be nice if tgsi_scan_shader figured this out
+         * for us:
+         */
+        struct {
+                unsigned first, last;
+                struct ir3_instruction *fanin;
+        } array[MAX_ARRAYS];
+        uint32_t array_dirty;
+        /* offset into array[], per file, of first array info */
+        uint8_t array_offsets[TGSI_FILE_COUNT];
+        /* for calculating input/output positions/linkages: */
+        unsigned next_inloc;
+        /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
+         * so we need to use ldlv.u32 to load the varying directly:
+         */
+        bool flat_bypass;
+        unsigned num_internal_temps;
+        struct tgsi_src_register internal_temps[8];
+        /* for looking up which system value is which */
+        unsigned sysval_semantics[8];
+        /* idx/slot for last compiler generated immediate */
+        unsigned immediate_idx;
+        /* stack of branch instructions that mark (potentially nested)
+         * branch if/else/loop/etc
+         */
+        struct {
+                struct ir3_instruction *instr, *cond;
+                bool inv;   /* true iff in else leg of branch */
+        } branch[16];
+        unsigned int branch_count;
+        /* list of kill instructions: */
+        struct ir3_instruction *kill[16];
+        unsigned int kill_count;
+        /* used when dst is same as one of the src, to avoid overwriting a
+         * src element before the remaining scalar instructions that make
+         * up the vector operation
+         */
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register *tmp_src;
+        /* just for catching incorrect use of get_dst()/put_dst():
+         */
+        bool using_tmp_dst;
+};
+static void vectorize(struct ir3_compile_context *ctx,
+                struct ir3_instruction *instr, struct tgsi_dst_register *dst,
+                int nsrcs, ...);
+static void create_mov(struct ir3_compile_context *ctx,
+                struct tgsi_dst_register *dst, struct tgsi_src_register *src);
+static type_t get_ftype(struct ir3_compile_context *ctx);
+static type_t get_utype(struct ir3_compile_context *ctx);
+static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i)
+{
+        /* ArrayID 0 for a given file is the legacy array spanning the entire file: */
+        ctx->array[i].first = 0;
+        ctx->array[i].last = ctx->info.file_max[file];
+        ctx->array_offsets[file] = i;
+        i += ctx->info.array_max[file] + 1;
+        return i;
+}
+static unsigned
+compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
+                const struct tgsi_token *tokens)
+{
+        unsigned ret, i;
+        struct tgsi_shader_info *info = &ctx->info;
+        struct tgsi_lowering_config lconfig = {
+                        .color_two_side = so->key.color_two_side,
+                        .lower_DST  = true,
+                        .lower_XPD  = true,
+                        .lower_SCS  = true,
+                        .lower_LRP  = true,
+                        .lower_FRC  = true,
+                        .lower_POW  = true,
+                        .lower_LIT  = true,
+                        .lower_EXP  = true,
+                        .lower_LOG  = true,
+                        .lower_DP4  = true,
+                        .lower_DP3  = true,
+                        .lower_DPH  = true,
+                        .lower_DP2  = true,
+                        .lower_DP2A = true,
+        };
+        switch (so->type) {
+        case SHADER_FRAGMENT:
+        case SHADER_COMPUTE:
+                lconfig.saturate_s = so->key.fsaturate_s;
+                lconfig.saturate_t = so->key.fsaturate_t;
+                lconfig.saturate_r = so->key.fsaturate_r;
+                ctx->integer_s = so->key.finteger_s;
+                break;
+        case SHADER_VERTEX:
+                lconfig.saturate_s = so->key.vsaturate_s;
+                lconfig.saturate_t = so->key.vsaturate_t;
+                lconfig.saturate_r = so->key.vsaturate_r;
+                ctx->integer_s = so->key.vinteger_s;
+                break;
+        }
+        if (!so->shader) {
+                /* hack for standalone compiler which does not have
+                 * screen/context:
+                 */
+        } else if (ir3_shader_gpuid(so->shader) >= 400) {
+                /* a4xx seems to have *no* sam.p */
+                lconfig.lower_TXP = ~0;  /* lower all txp */
+                /* need special handling for "flat" */
+                ctx->flat_bypass = true;
+        } else {
+                /* a3xx just needs to avoid sam.p for 3d tex */
+                lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
+                /* no special handling for "flat" */
+                ctx->flat_bypass = false;
+        }
+        ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
+        ctx->free_tokens = !!ctx->tokens;
+        if (!ctx->tokens) {
+                /* no lowering */
+                ctx->tokens = tokens;
+        }
+        ctx->ir = so->ir;
+        ctx->so = so;
+        ctx->array_dirty = 0;
+        ctx->next_inloc = 8;
+        ctx->num_internal_temps = 0;
+        ctx->branch_count = 0;
+        ctx->kill_count = 0;
+        ctx->block = NULL;
+        ctx->current_instr = NULL;
+        ctx->num_output_updates = 0;
+        ctx->atomic = false;
+        ctx->frag_pos = NULL;
+        ctx->frag_face = NULL;
+        ctx->vertex_id = NULL;
+        ctx->instance_id = NULL;
+        ctx->tmp_src = NULL;
+        ctx->using_tmp_dst = false;
+        memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
+        memset(ctx->array, 0, sizeof(ctx->array));
+        memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets));
+#define FM(x) (1 << TGSI_FILE_##x)
+        /* NOTE: if relative addressing is used, we set constlen in
+         * the compiler (to worst-case value) since we don't know in
+         * the assembler what the max addr reg value can be:
+         */
+        if (info->indirect_files & FM(CONSTANT))
+                so->constlen = MIN2(255, ctx->info.const_file_max[0] + 1);
+        i = 0;
+        i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
+        i += setup_arrays(ctx, TGSI_FILE_TEMPORARY, i);
+        i += setup_arrays(ctx, TGSI_FILE_OUTPUT, i);
+        /* any others? we don't track arrays for const..*/
+        /* Immediates go after constants: */
+        so->first_immediate = so->first_driver_param =
+                info->const_file_max[0] + 1;
+        /* 1 unit for the vertex id base */
+        if (so->type == SHADER_VERTEX)
+                so->first_immediate++;
+        /* 4 (vec4) units for ubo base addresses */
+        so->first_immediate += 4;
+        ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+        ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
+        if (ret != TGSI_PARSE_OK)
+                return ret;
+        ctx->type = ctx->parser.FullHeader.Processor.Processor;
+        return ret;
+}
+static void
+compile_error(struct ir3_compile_context *ctx, const char *format, ...)
+{
+        va_list ap;
+        va_start(ap, format);
+        _debug_vprintf(format, ap);
+        va_end(ap);
+        tgsi_dump(ctx->tokens, 0);
+        debug_assert(0);
+}
+#define compile_assert(ctx, cond) do { \
+                if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
+        } while (0)
+static void
+compile_free(struct ir3_compile_context *ctx)
+{
+        if (ctx->free_tokens)
+                free((void *)ctx->tokens);
+        tgsi_parse_free(&ctx->parser);
+}
+struct instr_translater {
+        void (*fxn)(const struct instr_translater *t,
+                        struct ir3_compile_context *ctx,
+                        struct tgsi_full_instruction *inst);
+        unsigned tgsi_opc;
+        opc_t opc;
+        opc_t hopc;    /* opc to use for half_precision mode, if different */
+        unsigned arg;
+};
+static void
+instr_finish(struct ir3_compile_context *ctx)
+{
+        unsigned i;
+        if (ctx->atomic)
+                return;
+        for (i = 0; i < ctx->num_output_updates; i++)
+                *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr;
+        ctx->num_output_updates = 0;
+        while (ctx->array_dirty) {
+                unsigned aid = ffs(ctx->array_dirty) - 1;
+                ctx->array[aid].fanin = NULL;
+                ctx->array_dirty &= ~(1 << aid);
+        }
+}
+/* For "atomic" groups of instructions, for example the four scalar
+ * instructions to perform a vec4 operation.  Basically this just
+ * blocks out handling of output_updates so the next scalar instruction
+ * still sees the result from before the start of the atomic group.
+ *
+ * NOTE: when used properly, this could probably replace get/put_dst()
+ * stuff.
+ */
+static void
+instr_atomic_start(struct ir3_compile_context *ctx)
+{
+        ctx->atomic = true;
+}
+static void
+instr_atomic_end(struct ir3_compile_context *ctx)
+{
+        ctx->atomic = false;
+        instr_finish(ctx);
+}
+static struct ir3_instruction *
+instr_create(struct ir3_compile_context *ctx, int category, opc_t opc)
+{
+        instr_finish(ctx);
+        return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc));
+}
+static struct ir3_block *
+push_block(struct ir3_compile_context *ctx)
+{
+        struct ir3_block *block;
+        unsigned ntmp, nin, nout;
+#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1))
+        /* hmm, give ourselves room to create 8 extra temporaries (vec4):
+         */
+        ntmp = SCALAR_REGS(TEMPORARY);
+        ntmp += 8 * 4;
+        nout = SCALAR_REGS(OUTPUT);
+        nin  = SCALAR_REGS(INPUT) + SCALAR_REGS(SYSTEM_VALUE);
+        /* for outermost block, 'inputs' are the actual shader INPUT
+         * register file.  Reads from INPUT registers always go back to
+         * top block.  For nested blocks, 'inputs' is used to track any
+         * TEMPORARY file register from one of the enclosing blocks that
+         * is ready in this block.
+         */
+        if (!ctx->block) {
+                /* NOTE: fragment shaders actually have two inputs (r0.xy, the
+                 * position)
+                 */
+                if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                        int n = 2;
+                        if (ctx->info.reads_position)
+                                n += 4;
+                        if (ctx->info.uses_frontface)
+                                n += 4;
+                        nin = MAX2(n, nin);
+                        nout += ARRAY_SIZE(ctx->kill);
+                }
+        } else {
+                nin = ntmp;
+        }
+        block = ir3_block_create(ctx->ir, ntmp, nin, nout);
+        if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block)
+                block->noutputs -= ARRAY_SIZE(ctx->kill);
+        block->parent = ctx->block;
+        ctx->block = block;
+        return block;
+}
+static void
+pop_block(struct ir3_compile_context *ctx)
+{
+        ctx->block = ctx->block->parent;
+        compile_assert(ctx, ctx->block);
+}
+static struct ir3_instruction *
+create_output(struct ir3_block *block, struct ir3_instruction *instr,
+                unsigned n)
+{
+        struct ir3_instruction *out;
+        out = ir3_instr_create(block, -1, OPC_META_OUTPUT);
+        out->inout.block = block;
+        ir3_reg_create(out, n, 0);
+        if (instr)
+                ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr;
+        return out;
+}
+static struct ir3_instruction *
+create_input(struct ir3_block *block, struct ir3_instruction *instr,
+                unsigned n)
+{
+        struct ir3_instruction *in;
+        in = ir3_instr_create(block, -1, OPC_META_INPUT);
+        in->inout.block = block;
+        ir3_reg_create(in, n, 0);
+        if (instr)
+                ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
+        return in;
+}
+static struct ir3_instruction *
+block_input(struct ir3_block *block, unsigned n)
+{
+        /* references to INPUT register file always go back up to
+         * top level:
+         */
+        if (block->parent)
+                return block_input(block->parent, n);
+        return block->inputs[n];
+}
+/* return temporary in scope, creating if needed meta-input node
+ * to track block inputs
+ */
+static struct ir3_instruction *
+block_temporary(struct ir3_block *block, unsigned n)
+{
+        /* references to TEMPORARY register file, find the nearest
+         * enclosing block which has already assigned this temporary,
+         * creating meta-input instructions along the way to keep
+         * track of block inputs
+         */
+        if (block->parent && !block->temporaries[n]) {
+                /* if already have input for this block, reuse: */
+                if (!block->inputs[n])
+                        block->inputs[n] = block_temporary(block->parent, n);
+                /* and create new input to return: */
+                return create_input(block, block->inputs[n], n);
+        }
+        return block->temporaries[n];
+}
+static struct ir3_instruction *
+create_immed(struct ir3_compile_context *ctx, float val)
+{
+        /* NOTE: *don't* use instr_create() here!
+         */
+        struct ir3_instruction *instr;
+        instr = ir3_instr_create(ctx->block, 1, 0);
+        instr->cat1.src_type = get_ftype(ctx);
+        instr->cat1.dst_type = get_ftype(ctx);
+        ir3_reg_create(instr, 0, 0);
+        ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val;
+        return instr;
+}
+static void
+ssa_instr_set(struct ir3_compile_context *ctx, unsigned file, unsigned n,
+                struct ir3_instruction *instr)
+{
+        struct ir3_block *block = ctx->block;
+        unsigned idx = ctx->num_output_updates;
+        compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates));
+        /* NOTE: defer update of temporaries[idx] or output[idx]
+         * until instr_finish(), so that if the current instruction
+         * reads the same TEMP/OUT[] it gets the old value:
+         *
+         * bleh.. this might be a bit easier to just figure out
+         * in instr_finish().  But at that point we've already
+         * lost information about OUTPUT vs TEMPORARY register
+         * file..
+         */
+        switch (file) {
+        case TGSI_FILE_OUTPUT:
+                compile_assert(ctx, n < block->noutputs);
+                ctx->output_updates[idx].instrp = &block->outputs[n];
+                ctx->output_updates[idx].instr = instr;
+                ctx->num_output_updates++;
+                break;
+        case TGSI_FILE_TEMPORARY:
+                compile_assert(ctx, n < block->ntemporaries);
+                ctx->output_updates[idx].instrp = &block->temporaries[n];
+                ctx->output_updates[idx].instr = instr;
+                ctx->num_output_updates++;
+                break;
+        case TGSI_FILE_ADDRESS:
+                compile_assert(ctx, n < 1);
+                ctx->output_updates[idx].instrp = &block->address;
+                ctx->output_updates[idx].instr = instr;
+                ctx->num_output_updates++;
+                break;
+        }
+}
+static struct ir3_instruction *
+ssa_instr_get(struct ir3_compile_context *ctx, unsigned file, unsigned n)
+{
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction *instr = NULL;
+        switch (file) {
+        case TGSI_FILE_INPUT:
+                instr = block_input(ctx->block, n);
+                break;
+        case TGSI_FILE_OUTPUT:
+                /* really this should just happen in case of 'MOV_SAT OUT[n], ..',
+                 * for the following clamp instructions:
+                 */
+                instr = block->outputs[n];
+                /* we don't have to worry about read from an OUTPUT that was
+                 * assigned outside of the current block, because the _SAT
+                 * clamp instructions will always be in the same block as
+                 * the original instruction which wrote the OUTPUT
+                 */
+                compile_assert(ctx, instr);
+                break;
+        case TGSI_FILE_TEMPORARY:
+                instr = block_temporary(ctx->block, n);
+                if (!instr) {
+                        /* this can happen when registers (or components of a TGSI
+                         * register) are used as src before they have been assigned
+                         * (undefined contents).  To avoid confusing the rest of the
+                         * compiler, and to generally keep things peachy, substitute
+                         * an instruction that sets the src to 0.0.  Or to keep
+                         * things undefined, I could plug in a random number? :-P
+                         *
+                         * NOTE: *don't* use instr_create() here!
+                         */
+                        instr = create_immed(ctx, 0.0);
+                        /* no need to recreate the immed for every access: */
+                        block->temporaries[n] = instr;
+                }
+                break;
+        case TGSI_FILE_SYSTEM_VALUE:
+                switch (ctx->sysval_semantics[n >> 2]) {
+                case TGSI_SEMANTIC_VERTEXID_NOBASE:
+                        instr = ctx->vertex_id;
+                        break;
+                case TGSI_SEMANTIC_BASEVERTEX:
+                        instr = ctx->basevertex;
+                        break;
+                case TGSI_SEMANTIC_INSTANCEID:
+                        instr = ctx->instance_id;
+                        break;
+                }
+                break;
+        }
+        return instr;
+}
+static int dst_array_id(struct ir3_compile_context *ctx,
+                const struct tgsi_dst_register *dst)
+{
+        // XXX complete hack to recover tgsi_full_dst_register...
+        // nothing that isn't wrapped in a tgsi_full_dst_register
+        // should be indirect
+        const struct tgsi_full_dst_register *fdst = (const void *)dst;
+        return fdst->Indirect.ArrayID + ctx->array_offsets[dst->File];
+}
+static int src_array_id(struct ir3_compile_context *ctx,
+                const struct tgsi_src_register *src)
+{
+        // XXX complete hack to recover tgsi_full_src_register...
+        // nothing that isn't wrapped in a tgsi_full_src_register
+        // should be indirect
+        const struct tgsi_full_src_register *fsrc = (const void *)src;
+        debug_assert(src->File != TGSI_FILE_CONSTANT);
+        return fsrc->Indirect.ArrayID + ctx->array_offsets[src->File];
+}
+static struct ir3_instruction *
+array_fanin(struct ir3_compile_context *ctx, unsigned aid, unsigned file)
+{
+        struct ir3_instruction *instr;
+        if (ctx->array[aid].fanin) {
+                instr = ctx->array[aid].fanin;
+        } else {
+                unsigned first = ctx->array[aid].first;
+                unsigned last  = ctx->array[aid].last;
+                unsigned i, j;
+                instr = ir3_instr_create2(ctx->block, -1, OPC_META_FI,
++ (4 * (last + 1 - first)));
+                ir3_reg_create(instr, 0, 0);
+                for (i = first; i <= last; i++) {
+                        for (j = 0; j < 4; j++) {
+                                unsigned n = regid(i, j);
+                                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr =
+                                                ssa_instr_get(ctx, file, n);
+                        }
+                }
+                ctx->array[aid].fanin = instr;
+                ctx->array_dirty |= (1 << aid);
+        }
+        return instr;
+}
+static void
+ssa_dst(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
+                const struct tgsi_dst_register *dst, unsigned chan)
+{
+        if (dst->Indirect) {
+                struct ir3_register *reg = instr->regs[0];
+                unsigned i, aid = dst_array_id(ctx, dst);
+                unsigned first = ctx->array[aid].first;
+                unsigned last  = ctx->array[aid].last;
+                unsigned off   = dst->Index - first; /* vec4 offset */
+                reg->size = 4 * (1 + last - first);
+                reg->offset = regid(off, chan);
+                instr->fanin = array_fanin(ctx, aid, dst->File);
+                /* annotate with the array-id, to help out the register-
+                 * assignment stage.  At least for the case of indirect
+                 * writes, we should capture enough dependencies to
+                 * preserve the order of reads/writes of the array, so
+                 * the multiple "names" for the array should end up all
+                 * assigned to the same registers.
+                 */
+                instr->fanin->fi.aid = aid;
+                /* Since we are scalarizing vec4 tgsi instructions/regs, we
+                 * run into a slight complication here.  To do the naive thing
+                 * and setup a fanout for each scalar array element would end
+                 * up with the result that the instructions generated for each
+                 * component of the vec4 would end up clobbering each other.
+                 * So we take advantage here of knowing that the array index
+                 * (after the shl.b) will be a multiple of four, and only set
+                 * every fourth scalar component in the array.  See also
+                 * fixup_ssa_dst_array()
+                 */
+                for (i = first; i <= last; i++) {
+                        struct ir3_instruction *split;
+                        unsigned n = regid(i, chan);
+                        int off = (4 * (i - first)) + chan;
+                        if (is_meta(instr) && (instr->opc == OPC_META_FO))
+                                off -= instr->fo.off;
+                        split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
+                        split->fo.off = off;
+                        ir3_reg_create(split, 0, 0);
+                        ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
+                        ssa_instr_set(ctx, dst->File, n, split);
+                }
+        } else {
+                /* normal case (not relative addressed GPR) */
+                ssa_instr_set(ctx, dst->File, regid(dst->Index, chan), instr);
+        }
+}
+static void
+ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg,
+                const struct tgsi_src_register *src, unsigned chan)
+{
+        struct ir3_instruction *instr;
+        if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
+                /* for relative addressing of gpr's (due to register assignment)
+                 * we must generate a fanin instruction to collect all possible
+                 * array elements that the instruction could address together:
+                 */
+                unsigned aid   = src_array_id(ctx, src);
+                unsigned first = ctx->array[aid].first;
+                unsigned last  = ctx->array[aid].last;
+                unsigned off   = src->Index - first; /* vec4 offset */
+                reg->size = 4 * (1 + last - first);
+                reg->offset = regid(off, chan);
+                instr = array_fanin(ctx, aid, src->File);
+        } else if (src->File == TGSI_FILE_CONSTANT && src->Dimension) {
+                const struct tgsi_full_src_register *fsrc = (const void *)src;
+                struct ir3_instruction *temp = NULL;
+                int ubo_regid = regid(ctx->so->first_driver_param, 0) +
+                        fsrc->Dimension.Index - 1;
+                int offset = 0;
+                /* We don't handle indirect UBO array accesses... yet. */
+                compile_assert(ctx, !fsrc->Dimension.Indirect);
+                /* UBOs start at index 1. */
+                compile_assert(ctx, fsrc->Dimension.Index > 0);
+                if (src->Indirect) {
+                        /* In case of an indirect index, it will have been loaded into an
+                         * address register. There will be a sequence of
+                         *
+                         *   shl.b x, val, 2
+                         *   mova a0, x
+                         *
+                         * We rely on this sequence to get the original val out and shift
+                         * it by 4, since we're dealing in vec4 units.
+                         */
+                        compile_assert(ctx, ctx->block->address);
+                        compile_assert(ctx, ctx->block->address->regs[1]->instr->opc ==
+                                                   OPC_SHL_B);
+                        temp = instr = instr_create(ctx, 2, OPC_SHL_B);
+                        ir3_reg_create(instr, 0, 0);
+                        ir3_reg_create(instr, 0, IR3_REG_HALF | IR3_REG_SSA)->instr =
+                                ctx->block->address->regs[1]->instr->regs[1]->instr;
+                        ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
+                } else if (src->Index >= 64) {
+                        /* Otherwise it's a plain index (in vec4 units). Move it into a
+                         * register.
+                         */
+                        temp = instr = instr_create(ctx, 1, 0);
+                        instr->cat1.src_type = get_utype(ctx);
+                        instr->cat1.dst_type = get_utype(ctx);
+                        ir3_reg_create(instr, 0, 0);
+                        ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = src->Index * 16;
+                } else {
+                        /* The offset is small enough to fit into the ldg instruction
+                         * directly.
+                         */
+                        offset = src->Index * 16;
+                }
+                if (temp) {
+                        /* If there was an offset (most common), add it to the buffer
+                         * address.
+                         */
+                        instr = instr_create(ctx, 2, OPC_ADD_S);
+                        ir3_reg_create(instr, 0, 0);
+                        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
+                        ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
+                } else {
+                        /* Otherwise just load the buffer address directly */
+                        instr = instr_create(ctx, 1, 0);
+                        instr->cat1.src_type = get_utype(ctx);
+                        instr->cat1.dst_type = get_utype(ctx);
+                        ir3_reg_create(instr, 0, 0);
+                        ir3_reg_create(instr, ubo_regid, IR3_REG_CONST);
+                }
+                temp = instr;
+                instr = instr_create(ctx, 6, OPC_LDG);
+                instr->cat6.type = TYPE_U32;
+                instr->cat6.offset = offset + chan * 4;
+                ir3_reg_create(instr, 0, 0);
+                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = temp;
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+                reg->flags &= ~(IR3_REG_RELATIV | IR3_REG_CONST);
+        } else {
+                /* normal case (not relative addressed GPR) */
+                instr = ssa_instr_get(ctx, src->File, regid(src->Index, chan));
+        }
+        if (instr) {
+                reg->flags |= IR3_REG_SSA;
+                reg->instr = instr;
+        } else if (reg->flags & IR3_REG_SSA) {
+                /* special hack for trans_samp() which calls ssa_src() directly
+                 * to build up the collect (fanin) for const src.. (so SSA flag
+                 * set but no src instr... it basically gets lucky because we
+                 * default to 0.0 for "undefined" src instructions, which is
+                 * what it wants.  We probably need to give it a better way to
+                 * do this, but for now this hack:
+                 */
+                reg->instr = create_immed(ctx, 0.0);
+        }
+}
+static struct ir3_register *
+add_dst_reg_wrmask(struct ir3_compile_context *ctx,
+                struct ir3_instruction *instr, const struct tgsi_dst_register *dst,
+                unsigned chan, unsigned wrmask)
+{
+        unsigned flags = 0, num = 0;
+        struct ir3_register *reg;
+        switch (dst->File) {
+        case TGSI_FILE_OUTPUT:
+        case TGSI_FILE_TEMPORARY:
+                /* uses SSA */
+                break;
+        case TGSI_FILE_ADDRESS:
+                flags |= IR3_REG_ADDR;
+                /* uses SSA */
+                break;
+        default:
+                compile_error(ctx, "unsupported dst register file: %s\n",
+                        tgsi_file_name(dst->File));
+                break;
+        }
+        if (dst->Indirect) {
+                flags |= IR3_REG_RELATIV;
+                /* shouldn't happen, and we can't cope with it below: */
+                compile_assert(ctx, wrmask == 0x1);
+                compile_assert(ctx, ctx->block->address);
+                if (instr->address)
+                        compile_assert(ctx, ctx->block->address == instr->address);
+                instr->address = ctx->block->address;
+                array_insert(ctx->ir->indirects, instr);
+        }
+        reg = ir3_reg_create(instr, regid(num, chan), flags);
+        reg->wrmask = wrmask;
+        if (wrmask == 0x1) {
+                /* normal case */
+                ssa_dst(ctx, instr, dst, chan);
+        } else if ((dst->File == TGSI_FILE_TEMPORARY) ||
+                        (dst->File == TGSI_FILE_OUTPUT) ||
+                        (dst->File == TGSI_FILE_ADDRESS)) {
+                struct ir3_instruction *prev = NULL;
+                unsigned i;
+                compile_assert(ctx, !dst->Indirect);
+                /* if instruction writes multiple, we need to create
+                 * some place-holder collect the registers:
+                 */
+                for (i = 0; i < 4; i++) {
+                        /* NOTE: slightly ugly that we setup neighbor ptrs
+                         * for FO here, but handle FI in CP pass.. we should
+                         * probably just always setup neighbor ptrs in the
+                         * frontend?
+                         */
+                        struct ir3_instruction *split =
+                                        ir3_instr_create(ctx->block, -1, OPC_META_FO);
+                        split->fo.off = i;
+                        /* unused dst reg: */
+                        /* NOTE: set SSA flag on dst here, because unused FO's
+                         * which don't get scheduled will end up not in the
+                         * instruction list when RA sets SSA flag on each dst.
+                         * Slight hack.  We really should set SSA flag on
+                         * every dst register in the frontend.
+                         */
+                        ir3_reg_create(split, 0, IR3_REG_SSA);
+                        /* and src reg used to hold original instr */
+                        ir3_reg_create(split, 0, IR3_REG_SSA)->instr = instr;
+                        if (prev) {
+                                split->cp.left = prev;
+                                split->cp.left_cnt++;
+                                prev->cp.right = split;
+                                prev->cp.right_cnt++;
+                        }
+                        if ((wrmask & (1 << i)) && !ctx->atomic)
+                                ssa_dst(ctx, split, dst, chan+i);
+                        prev = split;
+                }
+        }
+        return reg;
+}
+static struct ir3_register *
+add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
+                const struct tgsi_dst_register *dst, unsigned chan)
+{
+        return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1);
+}
+static struct ir3_register *
+add_src_reg_wrmask(struct ir3_compile_context *ctx,
+                struct ir3_instruction *instr, const struct tgsi_src_register *src,
+                unsigned chan, unsigned wrmask)
+{
+        unsigned flags = 0, num = 0;
+        struct ir3_register *reg;
+        switch (src->File) {
+        case TGSI_FILE_IMMEDIATE:
+                /* TODO if possible, use actual immediate instead of const.. but
+                 * TGSI has vec4 immediates, we can only embed scalar (of limited
+                 * size, depending on instruction..)
+                 */
+                flags |= IR3_REG_CONST;
+                num = src->Index + ctx->so->first_immediate;
+                break;
+        case TGSI_FILE_CONSTANT:
+                flags |= IR3_REG_CONST;
+                num = src->Index;
+                break;
+        case TGSI_FILE_OUTPUT:
+                /* NOTE: we should only end up w/ OUTPUT file for things like
+                 * clamp()'ing saturated dst instructions
+                 */
+        case TGSI_FILE_INPUT:
+        case TGSI_FILE_TEMPORARY:
+        case TGSI_FILE_SYSTEM_VALUE:
+                /* uses SSA */
+                break;
+        default:
+                compile_error(ctx, "unsupported src register file: %s\n",
+                        tgsi_file_name(src->File));
+                break;
+        }
+        /* We seem to have 8 bits (6.2) for dst register always, so I think
+         * it is safe to assume GPR cannot be >=64
+         *
+         * cat3 instructions only have 8 bits for src2, but cannot take a
+         * const for src2
+         *
+         * cat5 and cat6 in some cases only has 8 bits, but cannot take a
+         * const for any src.
+         *
+         * Other than that we seem to have 12 bits to encode const src,
+         * except for cat1 which may only have 11 bits (but that seems like
+         * a bug)
+         */
+        if (flags & IR3_REG_CONST)
+                compile_assert(ctx, src->Index < (1 << 9));
+        else
+                compile_assert(ctx, src->Index < (1 << 6));
+        /* NOTE: abs/neg modifiers in tgsi only apply to float */
+        if (src->Absolute)
+                flags |= IR3_REG_FABS;
+        if (src->Negate)
+                flags |= IR3_REG_FNEG;
+        if (src->Indirect) {
+                flags |= IR3_REG_RELATIV;
+                /* shouldn't happen, and we can't cope with it below: */
+                compile_assert(ctx, wrmask == 0x1);
+                compile_assert(ctx, ctx->block->address);
+                if (instr->address)
+                        compile_assert(ctx, ctx->block->address == instr->address);
+                instr->address = ctx->block->address;
+                array_insert(ctx->ir->indirects, instr);
+        }
+        reg = ir3_reg_create(instr, regid(num, chan), flags);
+        reg->wrmask = wrmask;
+        if (wrmask == 0x1) {
+                /* normal case */
+                ssa_src(ctx, reg, src, chan);
+        } else if ((src->File == TGSI_FILE_TEMPORARY) ||
+                        (src->File == TGSI_FILE_OUTPUT) ||
+                        (src->File == TGSI_FILE_INPUT)) {
+                struct ir3_instruction *collect;
+                unsigned i;
+                compile_assert(ctx, !src->Indirect);
+                /* if instruction reads multiple, we need to create
+                 * some place-holder collect the registers:
+                 */
+                collect = ir3_instr_create(ctx->block, -1, OPC_META_FI);
+                ir3_reg_create(collect, 0, 0);   /* unused dst reg */
+                for (i = 0; i < 4; i++) {
+                        if (wrmask & (1 << i)) {
+                                /* and src reg used point to the original instr */
+                                ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                                src, chan + i);
+                        } else if (wrmask & ~((i << i) - 1)) {
+                                /* if any remaining components, then dummy
+                                 * placeholder src reg to fill in the blanks:
+                                 */
+                                ir3_reg_create(collect, 0, 0);
+                        }
+                }
+                reg->flags |= IR3_REG_SSA;
+                reg->instr = collect;
+        }
+        return reg;
+}
+static struct ir3_register *
+add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
+                const struct tgsi_src_register *src, unsigned chan)
+{
+        return add_src_reg_wrmask(ctx, instr, src, chan, 0x1);
+}
+static void
+src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
+{
+        src->File      = dst->File;
+        src->Indirect  = dst->Indirect;
+        src->Dimension = dst->Dimension;
+        src->Index     = dst->Index;
+        src->Absolute  = 0;
+        src->Negate    = 0;
+        src->SwizzleX  = TGSI_SWIZZLE_X;
+        src->SwizzleY  = TGSI_SWIZZLE_Y;
+        src->SwizzleZ  = TGSI_SWIZZLE_Z;
+        src->SwizzleW  = TGSI_SWIZZLE_W;
+}
+/* Get internal-temp src/dst to use for a sequence of instructions
+ * generated by a single TGSI op.
+ */
+static struct tgsi_src_register *
+get_internal_temp(struct ir3_compile_context *ctx,
+                struct tgsi_dst_register *tmp_dst)
+{
+        struct tgsi_src_register *tmp_src;
+        int n;
+        tmp_dst->File      = TGSI_FILE_TEMPORARY;
+        tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
+        tmp_dst->Indirect  = 0;
+        tmp_dst->Dimension = 0;
+        /* assign next temporary: */
+        n = ctx->num_internal_temps++;
+        compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
+        tmp_src = &ctx->internal_temps[n];
+        tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
+        src_from_dst(tmp_src, tmp_dst);
+        return tmp_src;
+}
+static inline bool
+is_const(struct tgsi_src_register *src)
+{
+        return (src->File == TGSI_FILE_CONSTANT) ||
+                        (src->File == TGSI_FILE_IMMEDIATE);
+}
+static inline bool
+is_relative(struct tgsi_src_register *src)
+{
+        return src->Indirect;
+}
+static inline bool
+is_rel_or_const(struct tgsi_src_register *src)
+{
+        return is_relative(src) || is_const(src);
+}
+static type_t
+get_ftype(struct ir3_compile_context *ctx)
+{
+        return TYPE_F32;
+}
+static type_t
+get_utype(struct ir3_compile_context *ctx)
+{
+        return TYPE_U32;
+}
+static type_t
+get_stype(struct ir3_compile_context *ctx)
+{
+        return TYPE_S32;
+}
+static unsigned
+src_swiz(struct tgsi_src_register *src, int chan)
+{
+        switch (chan) {
+        case 0: return src->SwizzleX;
+        case 1: return src->SwizzleY;
+        case 2: return src->SwizzleZ;
+        case 3: return src->SwizzleW;
+        }
+        assert(0);
+        return 0;
+}
+/* for instructions that cannot take a const register as src, if needed
+ * generate a move to temporary gpr:
+ */
+static struct tgsi_src_register *
+get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src)
+{
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register *tmp_src;
+        compile_assert(ctx, is_rel_or_const(src));
+        tmp_src = get_internal_temp(ctx, &tmp_dst);
+        create_mov(ctx, &tmp_dst, src);
+        return tmp_src;
+}
+static void
+get_immediate(struct ir3_compile_context *ctx,
+                struct tgsi_src_register *reg, uint32_t val)
+{
+        unsigned neg, swiz, idx, i;
+        /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
+        static const unsigned swiz2tgsi[] = {
+                        TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+        };
+        for (i = 0; i < ctx->immediate_idx; i++) {
+                swiz = i % 4;
+                idx  = i / 4;
+                if (ctx->so->immediates[idx].val[swiz] == val) {
+                        neg = 0;
+                        break;
+                }
+                if (ctx->so->immediates[idx].val[swiz] == -val) {
+                        neg = 1;
+                        break;
+                }
+        }
+        if (i == ctx->immediate_idx) {
+                /* need to generate a new immediate: */
+                swiz = i % 4;
+                idx  = i / 4;
+                neg  = 0;
+                ctx->so->immediates[idx].val[swiz] = val;
+                ctx->so->immediates_count = idx + 1;
+                ctx->immediate_idx++;
+        }
+        reg->File      = TGSI_FILE_IMMEDIATE;
+        reg->Indirect  = 0;
+        reg->Dimension = 0;
+        reg->Index     = idx;
+        reg->Absolute  = 0;
+        reg->Negate    = neg;
+        reg->SwizzleX  = swiz2tgsi[swiz];
+        reg->SwizzleY  = swiz2tgsi[swiz];
+        reg->SwizzleZ  = swiz2tgsi[swiz];
+        reg->SwizzleW  = swiz2tgsi[swiz];
+}
+static void
+create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst,
+                struct tgsi_src_register *src)
+{
+        type_t type_mov = get_ftype(ctx);
+        unsigned i;
+        for (i = 0; i < 4; i++) {
+                /* move to destination: */
+                if (dst->WriteMask & (1 << i)) {
+                        struct ir3_instruction *instr;
+                        if (src->Absolute || src->Negate) {
+                                /* can't have abs or neg on a mov instr, so use
+                                 * absneg.f instead to handle these cases:
+                                 */
+                                instr = instr_create(ctx, 2, OPC_ABSNEG_F);
+                        } else {
+                                instr = instr_create(ctx, 1, 0);
+                                instr->cat1.src_type = type_mov;
+                                instr->cat1.dst_type = type_mov;
+                        }
+                        add_dst_reg(ctx, instr, dst, i);
+                        add_src_reg(ctx, instr, src, src_swiz(src, i));
+                }
+        }
+}
+static void
+create_clamp(struct ir3_compile_context *ctx,
+                struct tgsi_dst_register *dst, struct tgsi_src_register *val,
+                struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
+{
+        struct ir3_instruction *instr;
+        instr = instr_create(ctx, 2, OPC_MAX_F);
+        vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
+        instr = instr_create(ctx, 2, OPC_MIN_F);
+        vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
+}
+static void
+create_clamp_imm(struct ir3_compile_context *ctx,
+                struct tgsi_dst_register *dst,
+                uint32_t minval, uint32_t maxval)
+{
+        struct tgsi_src_register minconst, maxconst;
+        struct tgsi_src_register src;
+        src_from_dst(&src, dst);
+        get_immediate(ctx, &minconst, minval);
+        get_immediate(ctx, &maxconst, maxval);
+        create_clamp(ctx, dst, &src, &minconst, &maxconst);
+}
+static struct tgsi_dst_register *
+get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst)
+{
+        struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+        unsigned i;
+        compile_assert(ctx, !ctx->using_tmp_dst);
+        ctx->using_tmp_dst = true;
+        for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+                struct tgsi_src_register *src = &inst->Src[i].Register;
+                if ((src->File == dst->File) && (src->Index == dst->Index)) {
+                        if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) &&
+                                        (src->SwizzleX == TGSI_SWIZZLE_X) &&
+                                        (src->SwizzleY == TGSI_SWIZZLE_Y) &&
+                                        (src->SwizzleZ == TGSI_SWIZZLE_Z) &&
+                                        (src->SwizzleW == TGSI_SWIZZLE_W))
+                                continue;
+                        ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
+                        ctx->tmp_dst.WriteMask = dst->WriteMask;
+                        dst = &ctx->tmp_dst;
+                        break;
+                }
+        }
+        return dst;
+}
+static void
+put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst,
+                struct tgsi_dst_register *dst)
+{
+        compile_assert(ctx, ctx->using_tmp_dst);
+        ctx->using_tmp_dst = false;
+        /* if necessary, add mov back into original dst: */
+        if (dst != &inst->Dst[0].Register) {
+                create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
+        }
+}
+/* helper to generate the necessary repeat and/or additional instructions
+ * to turn a scalar instruction into a vector operation:
+ */
+static void
+vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr,
+                struct tgsi_dst_register *dst, int nsrcs, ...)
+{
+        va_list ap;
+        int i, j, n = 0;
+        instr_atomic_start(ctx);
+        for (i = 0; i < 4; i++) {
+                if (dst->WriteMask & (1 << i)) {
+                        struct ir3_instruction *cur;
+                        if (n++ == 0) {
+                                cur = instr;
+                        } else {
+                                cur = instr_create(ctx, instr->category, instr->opc);
+                                memcpy(cur->info, instr->info, sizeof(cur->info));
+                        }
+                        add_dst_reg(ctx, cur, dst, i);
+                        va_start(ap, nsrcs);
+                        for (j = 0; j < nsrcs; j++) {
+                                struct tgsi_src_register *src =
+                                                va_arg(ap, struct tgsi_src_register *);
+                                unsigned flags = va_arg(ap, unsigned);
+                                struct ir3_register *reg;
+                                if (flags & IR3_REG_IMMED) {
+                                        reg = ir3_reg_create(cur, 0, IR3_REG_IMMED);
+                                        /* this is an ugly cast.. should have put flags first! */
+                                        reg->iim_val = *(int *)&src;
+                                } else {
+                                        reg = add_src_reg(ctx, cur, src, src_swiz(src, i));
+                                }
+                                reg->flags |= flags & ~(IR3_REG_FNEG | IR3_REG_SNEG);
+                                if (flags & IR3_REG_FNEG)
+                                        reg->flags ^= IR3_REG_FNEG;
+                                if (flags & IR3_REG_SNEG)
+                                        reg->flags ^= IR3_REG_SNEG;
+                        }
+                        va_end(ap);
+                }
+        }
+        instr_atomic_end(ctx);
+}
+/*
+ * Handlers for TGSI instructions which do not have a 1:1 mapping to
+ * native instructions:
+ */
+static void
+trans_clamp(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *src0 = &inst->Src[0].Register;
+        struct tgsi_src_register *src1 = &inst->Src[1].Register;
+        struct tgsi_src_register *src2 = &inst->Src[2].Register;
+        create_clamp(ctx, dst, src0, src1, src2);
+        put_dst(ctx, inst, dst);
+}
+/* ARL(x) = x, but mova from hrN.x to a0.. */
+static void
+trans_arl(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register *tmp_src;
+        struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+        struct tgsi_src_register *src = &inst->Src[0].Register;
+        unsigned chan = src->SwizzleX;
+        compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS);
+        /* NOTE: we allocate a temporary from a flat register
+         * namespace (ignoring half vs full).  It turns out
+         * not to really matter since registers get reassigned
+         * later in ir3_ra which (hopefully!) can deal a bit
+         * better with mixed half and full precision.
+         */
+        tmp_src = get_internal_temp(ctx, &tmp_dst);
+        /* cov.{u,f}{32,16}s16 Rtmp, Rsrc */
+        instr = instr_create(ctx, 1, 0);
+        instr->cat1.src_type = (t->tgsi_opc == TGSI_OPCODE_ARL) ?
+                        get_ftype(ctx) : get_utype(ctx);
+        instr->cat1.dst_type = TYPE_S16;
+        add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
+        add_src_reg(ctx, instr, src, chan);
+        /* shl.b Rtmp, Rtmp, 2 */
+        instr = instr_create(ctx, 2, OPC_SHL_B);
+        add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
+        add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
+        ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
+        /* mova a0, Rtmp */
+        instr = instr_create(ctx, 1, 0);
+        instr->cat1.src_type = TYPE_S16;
+        instr->cat1.dst_type = TYPE_S16;
+        add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
+        add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
+}
+/*
+ * texture fetch/sample instructions:
+ */
+struct tex_info {
+        int8_t order[4];
+        int8_t args;
+        unsigned src_wrmask, flags;
+};
+struct target_info {
+        uint8_t dims;
+        uint8_t cube;
+        uint8_t array;
+        uint8_t shadow;
+};
+static const struct target_info tex_targets[] = {
+        [TGSI_TEXTURE_1D]               = { 1, 0, 0, 0 },
+        [TGSI_TEXTURE_2D]               = { 2, 0, 0, 0 },
+        [TGSI_TEXTURE_3D]               = { 3, 0, 0, 0 },
+        [TGSI_TEXTURE_CUBE]             = { 3, 1, 0, 0 },
+        [TGSI_TEXTURE_RECT]             = { 2, 0, 0, 0 },
+        [TGSI_TEXTURE_SHADOW1D]         = { 1, 0, 0, 1 },
+        [TGSI_TEXTURE_SHADOW2D]         = { 2, 0, 0, 1 },
+        [TGSI_TEXTURE_SHADOWRECT]       = { 2, 0, 0, 1 },
+        [TGSI_TEXTURE_1D_ARRAY]         = { 1, 0, 1, 0 },
+        [TGSI_TEXTURE_2D_ARRAY]         = { 2, 0, 1, 0 },
+        [TGSI_TEXTURE_SHADOW1D_ARRAY]   = { 1, 0, 1, 1 },
+        [TGSI_TEXTURE_SHADOW2D_ARRAY]   = { 2, 0, 1, 1 },
+        [TGSI_TEXTURE_SHADOWCUBE]       = { 3, 1, 0, 1 },
+        [TGSI_TEXTURE_2D_MSAA]          = { 2, 0, 0, 0 },
+        [TGSI_TEXTURE_2D_ARRAY_MSAA]    = { 2, 0, 1, 0 },
+        [TGSI_TEXTURE_CUBE_ARRAY]       = { 3, 1, 1, 0 },
+        [TGSI_TEXTURE_SHADOWCUBE_ARRAY] = { 3, 1, 1, 1 },
+};
+static void
+fill_tex_info(struct ir3_compile_context *ctx,
+                          struct tgsi_full_instruction *inst,
+                          struct tex_info *info)
+{
+        const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
+        if (tgt->dims == 3)
+                info->flags |= IR3_INSTR_3D;
+        if (tgt->array)
+                info->flags |= IR3_INSTR_A;
+        if (tgt->shadow)
+                info->flags |= IR3_INSTR_S;
+        switch (inst->Instruction.Opcode) {
+        case TGSI_OPCODE_TXB:
+        case TGSI_OPCODE_TXB2:
+        case TGSI_OPCODE_TXL:
+        case TGSI_OPCODE_TXF:
+                info->args = 2;
+                break;
+        case TGSI_OPCODE_TXP:
+                info->flags |= IR3_INSTR_P;
+                /* fallthrough */
+        case TGSI_OPCODE_TEX:
+        case TGSI_OPCODE_TXD:
+                info->args = 1;
+                break;
+        }
+        /*
+         * lay out the first argument in the proper order:
+         *  - actual coordinates first
+         *  - shadow reference
+         *  - array index
+         *  - projection w
+         *
+         * bias/lod go into the second arg
+         */
+        int arg, pos = 0;
+        for (arg = 0; arg < tgt->dims; arg++)
+                info->order[arg] = pos++;
+        if (tgt->dims == 1)
+                info->order[pos++] = -1;
+        if (tgt->shadow)
+                info->order[pos++] = MAX2(arg + tgt->array, 2);
+        if (tgt->array)
+                info->order[pos++] = arg++;
+        if (info->flags & IR3_INSTR_P)
+                info->order[pos++] = 3;
+        info->src_wrmask = (1 << pos) - 1;
+        for (; pos < 4; pos++)
+                info->order[pos] = -1;
+        assert(pos <= 4);
+}
+static bool check_swiz(struct tgsi_src_register *src, const int8_t order[4])
+{
+        unsigned i;
+        for (i = 1; (i < 4) && order[i] >= 0; i++)
+                if (src_swiz(src, i) != (src_swiz(src, 0) + order[i]))
+                        return false;
+        return true;
+}
+static bool is_1d(unsigned tex)
+{
+        return tex_targets[tex].dims == 1;
+}
+static struct tgsi_src_register *
+get_tex_coord(struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst,
+                const struct tex_info *tinf)
+{
+        struct tgsi_src_register *coord = &inst->Src[0].Register;
+        struct ir3_instruction *instr;
+        unsigned tex = inst->Texture.Texture;
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register *tmp_src;
+        type_t type_mov = get_ftype(ctx);
+        unsigned j;
+        /* need to move things around: */
+        tmp_src = get_internal_temp(ctx, &tmp_dst);
+        for (j = 0; j < 4; j++) {
+                if (tinf->order[j] < 0)
+                        continue;
+                instr = instr_create(ctx, 1, 0);  /* mov */
+                instr->cat1.src_type = type_mov;
+                instr->cat1.dst_type = type_mov;
+                add_dst_reg(ctx, instr, &tmp_dst, j);
+                add_src_reg(ctx, instr, coord,
+                                src_swiz(coord, tinf->order[j]));
+        }
+        /* fix up .y coord: */
+        if (is_1d(tex)) {
+                struct ir3_register *imm;
+                instr = instr_create(ctx, 1, 0);  /* mov */
+                instr->cat1.src_type = type_mov;
+                instr->cat1.dst_type = type_mov;
+                add_dst_reg(ctx, instr, &tmp_dst, 1);  /* .y */
+                imm = ir3_reg_create(instr, 0, IR3_REG_IMMED);
+                if (inst->Instruction.Opcode == TGSI_OPCODE_TXF)
+                        imm->iim_val = 0;
+                else
+                        imm->fim_val = 0.5;
+        }
+        return tmp_src;
+}
+static void
+trans_samp(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr, *collect;
+        struct ir3_register *reg;
+        struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+        struct tgsi_src_register *orig, *coord, *samp, *offset, *dpdx, *dpdy;
+        struct tgsi_src_register zero;
+        const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
+        struct tex_info tinf;
+        int i;
+        memset(&tinf, 0, sizeof(tinf));
+        fill_tex_info(ctx, inst, &tinf);
+        coord = get_tex_coord(ctx, inst, &tinf);
+        get_immediate(ctx, &zero, 0);
+        switch (inst->Instruction.Opcode) {
+        case TGSI_OPCODE_TXB2:
+                orig = &inst->Src[1].Register;
+                samp = &inst->Src[2].Register;
+                break;
+        case TGSI_OPCODE_TXD:
+                orig = &inst->Src[0].Register;
+                dpdx = &inst->Src[1].Register;
+                dpdy = &inst->Src[2].Register;
+                samp = &inst->Src[3].Register;
+                if (is_rel_or_const(dpdx))
+                                dpdx = get_unconst(ctx, dpdx);
+                if (is_rel_or_const(dpdy))
+                                dpdy = get_unconst(ctx, dpdy);
+                break;
+        default:
+                orig = &inst->Src[0].Register;
+                samp = &inst->Src[1].Register;
+                break;
+        }
+        if (tinf.args > 1 && is_rel_or_const(orig))
+                orig = get_unconst(ctx, orig);
+        /* scale up integer coords for TXF based on the LOD */
+        if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
+                struct tgsi_dst_register tmp_dst;
+                struct tgsi_src_register *tmp_src;
+                type_t type_mov = get_utype(ctx);
+                tmp_src = get_internal_temp(ctx, &tmp_dst);
+                for (i = 0; i < tgt->dims; i++) {
+                        instr = instr_create(ctx, 2, OPC_SHL_B);
+                        add_dst_reg(ctx, instr, &tmp_dst, i);
+                        add_src_reg(ctx, instr, coord, src_swiz(coord, i));
+                        add_src_reg(ctx, instr, orig, orig->SwizzleW);
+                }
+                if (tgt->dims < 2) {
+                        instr = instr_create(ctx, 1, 0);
+                        instr->cat1.src_type = type_mov;
+                        instr->cat1.dst_type = type_mov;
+                        add_dst_reg(ctx, instr, &tmp_dst, i);
+                        add_src_reg(ctx, instr, &zero, zero.SwizzleX);
+                        i++;
+                }
+                if (tgt->array) {
+                        instr = instr_create(ctx, 1, 0);
+                        instr->cat1.src_type = type_mov;
+                        instr->cat1.dst_type = type_mov;
+                        add_dst_reg(ctx, instr, &tmp_dst, i);
+                        add_src_reg(ctx, instr, coord, src_swiz(coord, i));
+                }
+                coord = tmp_src;
+        }
+        if (inst->Texture.NumOffsets) {
+                struct tgsi_texture_offset *tex_offset = &inst->TexOffsets[0];
+                struct tgsi_src_register offset_src = {0};
+                offset_src.File = tex_offset->File;
+                offset_src.Index = tex_offset->Index;
+                offset_src.SwizzleX = tex_offset->SwizzleX;
+                offset_src.SwizzleY = tex_offset->SwizzleY;
+                offset_src.SwizzleZ = tex_offset->SwizzleZ;
+                offset = get_unconst(ctx, &offset_src);
+                tinf.flags |= IR3_INSTR_O;
+        }
+        instr = instr_create(ctx, 5, t->opc);
+        if (ctx->integer_s & (1 << samp->Index))
+                instr->cat5.type = get_utype(ctx);
+        else
+                instr->cat5.type = get_ftype(ctx);
+        instr->cat5.samp = samp->Index;
+        instr->cat5.tex  = samp->Index;
+        instr->flags |= tinf.flags;
+        add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
+        reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
+        collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 12);
+        ir3_reg_create(collect, 0, 0);
+        for (i = 0; i < 4; i++) {
+                if (tinf.src_wrmask & (1 << i))
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                        coord, src_swiz(coord, i));
+                else if (tinf.src_wrmask & ~((1 << i) - 1))
+                        ir3_reg_create(collect, 0, 0);
+        }
+        /* Attach derivatives onto the end of the fan-in. Derivatives start after
+         * the 4th argument, so make sure that fi is padded up to 4 first.
+         */
+        if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
+                while (collect->regs_count < 5)
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                        &zero, zero.SwizzleX);
+                for (i = 0; i < tgt->dims; i++)
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdx, i);
+                if (tgt->dims < 2)
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                        &zero, zero.SwizzleX);
+                for (i = 0; i < tgt->dims; i++)
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), dpdy, i);
+                if (tgt->dims < 2)
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                        &zero, zero.SwizzleX);
+                tinf.src_wrmask |= ((1 << (2 * MAX2(tgt->dims, 2))) - 1) << 4;
+        }
+        reg->instr = collect;
+        reg->wrmask = tinf.src_wrmask;
+        /* The second argument contains the offsets, followed by the lod/bias
+         * argument. This is constructed more manually due to the dynamic nature.
+         */
+        if (inst->Texture.NumOffsets == 0 && tinf.args == 1)
+                return;
+        reg = ir3_reg_create(instr, 0, IR3_REG_SSA);
+        collect = ir3_instr_create2(ctx->block, -1, OPC_META_FI, 5);
+        ir3_reg_create(collect, 0, 0);
+        if (inst->Texture.NumOffsets) {
+                for (i = 0; i < tgt->dims; i++)
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                        offset, i);
+                if (tgt->dims < 2)
+                        ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                        &zero, zero.SwizzleX);
+        }
+        if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2)
+                ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                orig, orig->SwizzleX);
+        else if (tinf.args > 1)
+                ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA),
+                                orig, orig->SwizzleW);
+        reg->instr = collect;
+        reg->wrmask = (1 << (collect->regs_count - 1)) - 1;
+}
+static void
+trans_txq(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+        struct tgsi_src_register *level = &inst->Src[0].Register;
+        struct tgsi_src_register *samp = &inst->Src[1].Register;
+        const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
+        struct tex_info tinf;
+        memset(&tinf, 0, sizeof(tinf));
+        fill_tex_info(ctx, inst, &tinf);
+        if (is_rel_or_const(level))
+                level = get_unconst(ctx, level);
+        instr = instr_create(ctx, 5, OPC_GETSIZE);
+        instr->cat5.type = get_utype(ctx);
+        instr->cat5.samp = samp->Index;
+        instr->cat5.tex  = samp->Index;
+        instr->flags |= tinf.flags;
+        if (tgt->array && (dst->WriteMask & (1 << tgt->dims))) {
+                /* Array size actually ends up in .w rather than .z. This doesn't
+                 * matter for miplevel 0, but for higher mips the value in z is
+                 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+                 * returned, which means that we have to add 1 to it for arrays.
+                 */
+                struct tgsi_dst_register tmp_dst;
+                struct tgsi_src_register *tmp_src;
+                type_t type_mov = get_utype(ctx);
+                tmp_src = get_internal_temp(ctx, &tmp_dst);
+                add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0,
+                                                   dst->WriteMask | TGSI_WRITEMASK_W);
+                add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
+                if (dst->WriteMask & TGSI_WRITEMASK_X) {
+                        instr = instr_create(ctx, 1, 0);
+                        instr->cat1.src_type = type_mov;
+                        instr->cat1.dst_type = type_mov;
+                        add_dst_reg(ctx, instr, dst, 0);
+                        add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 0));
+                }
+                if (tgt->dims == 2) {
+                        if (dst->WriteMask & TGSI_WRITEMASK_Y) {
+                                instr = instr_create(ctx, 1, 0);
+                                instr->cat1.src_type = type_mov;
+                                instr->cat1.dst_type = type_mov;
+                                add_dst_reg(ctx, instr, dst, 1);
+                                add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 1));
+                        }
+                }
+                instr = instr_create(ctx, 2, OPC_ADD_U);
+                add_dst_reg(ctx, instr, dst, tgt->dims);
+                add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 3));
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+        } else {
+                add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
+                add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
+        }
+        if (dst->WriteMask & TGSI_WRITEMASK_W) {
+                /* The # of levels comes from getinfo.z. We need to add 1 to it, since
+                 * the value in TEX_CONST_0 is zero-based.
+                 */
+                struct tgsi_dst_register tmp_dst;
+                struct tgsi_src_register *tmp_src;
+                tmp_src = get_internal_temp(ctx, &tmp_dst);
+                instr = instr_create(ctx, 5, OPC_GETINFO);
+                instr->cat5.type = get_utype(ctx);
+                instr->cat5.samp = samp->Index;
+                instr->cat5.tex  = samp->Index;
+                add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0, TGSI_WRITEMASK_Z);
+                instr = instr_create(ctx, 2, OPC_ADD_U);
+                add_dst_reg(ctx, instr, dst, 3);
+                add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 2));
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+        }
+}
+/* DDX/DDY */
+static void
+trans_deriv(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+        struct tgsi_src_register *src = &inst->Src[0].Register;
+        static const int8_t order[4] = {0, 1, 2, 3};
+        if (!check_swiz(src, order)) {
+                struct tgsi_dst_register tmp_dst;
+                struct tgsi_src_register *tmp_src;
+                tmp_src = get_internal_temp(ctx, &tmp_dst);
+                create_mov(ctx, &tmp_dst, src);
+                src = tmp_src;
+        }
+        /* This might be a workaround for hw bug?  Blob compiler always
+         * seems to work two components at a time for dsy/dsx.  It does
+         * actually seem to work in some cases (or at least some piglit
+         * tests) for four components at a time.  But seems more reliable
+         * to split this into two instructions like the blob compiler
+         * does:
+         */
+        instr = instr_create(ctx, 5, t->opc);
+        instr->cat5.type = get_ftype(ctx);
+        add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask & 0x3);
+        add_src_reg_wrmask(ctx, instr, src, 0, dst->WriteMask & 0x3);
+        instr = instr_create(ctx, 5, t->opc);
+        instr->cat5.type = get_ftype(ctx);
+        add_dst_reg_wrmask(ctx, instr, dst, 2, (dst->WriteMask >> 2) & 0x3);
+        add_src_reg_wrmask(ctx, instr, src, 2, (dst->WriteMask >> 2) & 0x3);
+}
+/*
+ * SEQ(a,b) = (a == b) ? 1.0 : 0.0
+ *   cmps.f.eq tmp0, a, b
+ *   cov.u16f16 dst, tmp0
+ *
+ * SNE(a,b) = (a != b) ? 1.0 : 0.0
+ *   cmps.f.ne tmp0, a, b
+ *   cov.u16f16 dst, tmp0
+ *
+ * SGE(a,b) = (a >= b) ? 1.0 : 0.0
+ *   cmps.f.ge tmp0, a, b
+ *   cov.u16f16 dst, tmp0
+ *
+ * SLE(a,b) = (a <= b) ? 1.0 : 0.0
+ *   cmps.f.le tmp0, a, b
+ *   cov.u16f16 dst, tmp0
+ *
+ * SGT(a,b) = (a > b)  ? 1.0 : 0.0
+ *   cmps.f.gt tmp0, a, b
+ *   cov.u16f16 dst, tmp0
+ *
+ * SLT(a,b) = (a < b)  ? 1.0 : 0.0
+ *   cmps.f.lt tmp0, a, b
+ *   cov.u16f16 dst, tmp0
+ *
+ * CMP(a,b,c) = (a < 0.0) ? b : c
+ *   cmps.f.lt tmp0, a, {0.0}
+ *   sel.b16 dst, b, tmp0, c
+ */
+static void
+trans_cmp(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register *tmp_src;
+        struct tgsi_src_register constval0;
+        /* final instruction for CMP() uses orig src1 and src2: */
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *a0, *a1, *a2;
+        unsigned condition;
+        tmp_src = get_internal_temp(ctx, &tmp_dst);
+        a0 = &inst->Src[0].Register;  /* a */
+        a1 = &inst->Src[1].Register;  /* b */
+        switch (t->tgsi_opc) {
+        case TGSI_OPCODE_SEQ:
+        case TGSI_OPCODE_FSEQ:
+                condition = IR3_COND_EQ;
+                break;
+        case TGSI_OPCODE_SNE:
+        case TGSI_OPCODE_FSNE:
+                condition = IR3_COND_NE;
+                break;
+        case TGSI_OPCODE_SGE:
+        case TGSI_OPCODE_FSGE:
+                condition = IR3_COND_GE;
+                break;
+        case TGSI_OPCODE_SLT:
+        case TGSI_OPCODE_FSLT:
+                condition = IR3_COND_LT;
+                break;
+        case TGSI_OPCODE_SLE:
+                condition = IR3_COND_LE;
+                break;
+        case TGSI_OPCODE_SGT:
+                condition = IR3_COND_GT;
+                break;
+        case TGSI_OPCODE_CMP:
+                get_immediate(ctx, &constval0, fui(0.0));
+                a0 = &inst->Src[0].Register;  /* a */
+                a1 = &constval0;              /* {0.0} */
+                condition = IR3_COND_LT;
+                break;
+        default:
+                compile_assert(ctx, 0);
+                return;
+        }
+        if (is_const(a0) && is_const(a1))
+                a0 = get_unconst(ctx, a0);
+        /* cmps.f.<cond> tmp, a0, a1 */
+        instr = instr_create(ctx, 2, OPC_CMPS_F);
+        instr->cat2.condition = condition;
+        vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
+        switch (t->tgsi_opc) {
+        case TGSI_OPCODE_SEQ:
+        case TGSI_OPCODE_SGE:
+        case TGSI_OPCODE_SLE:
+        case TGSI_OPCODE_SNE:
+        case TGSI_OPCODE_SGT:
+        case TGSI_OPCODE_SLT:
+                /* cov.u16f16 dst, tmp0 */
+                instr = instr_create(ctx, 1, 0);
+                instr->cat1.src_type = get_utype(ctx);
+                instr->cat1.dst_type = get_ftype(ctx);
+                vectorize(ctx, instr, dst, 1, tmp_src, 0);
+                break;
+        case TGSI_OPCODE_FSEQ:
+        case TGSI_OPCODE_FSGE:
+        case TGSI_OPCODE_FSNE:
+        case TGSI_OPCODE_FSLT:
+                /* absneg.s dst, (neg)tmp0 */
+                instr = instr_create(ctx, 2, OPC_ABSNEG_S);
+                vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
+                break;
+        case TGSI_OPCODE_CMP:
+                a1 = &inst->Src[1].Register;
+                a2 = &inst->Src[2].Register;
+                /* sel.{b32,b16} dst, src2, tmp, src1 */
+                instr = instr_create(ctx, 3, OPC_SEL_B32);
+                vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
+                break;
+        }
+        put_dst(ctx, inst, dst);
+}
+/*
+ * USNE(a,b) = (a != b) ? ~0 : 0
+ *   cmps.u32.ne dst, a, b
+ *
+ * USEQ(a,b) = (a == b) ? ~0 : 0
+ *   cmps.u32.eq dst, a, b
+ *
+ * ISGE(a,b) = (a > b) ? ~0 : 0
+ *   cmps.s32.ge dst, a, b
+ *
+ * USGE(a,b) = (a > b) ? ~0 : 0
+ *   cmps.u32.ge dst, a, b
+ *
+ * ISLT(a,b) = (a < b) ? ~0 : 0
+ *   cmps.s32.lt dst, a, b
+ *
+ * USLT(a,b) = (a < b) ? ~0 : 0
+ *   cmps.u32.lt dst, a, b
+ *
+ */
+static void
+trans_icmp(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register *tmp_src;
+        struct tgsi_src_register *a0, *a1;
+        unsigned condition;
+        a0 = &inst->Src[0].Register;  /* a */
+        a1 = &inst->Src[1].Register;  /* b */
+        switch (t->tgsi_opc) {
+        case TGSI_OPCODE_USNE:
+                condition = IR3_COND_NE;
+                break;
+        case TGSI_OPCODE_USEQ:
+                condition = IR3_COND_EQ;
+                break;
+        case TGSI_OPCODE_ISGE:
+        case TGSI_OPCODE_USGE:
+                condition = IR3_COND_GE;
+                break;
+        case TGSI_OPCODE_ISLT:
+        case TGSI_OPCODE_USLT:
+                condition = IR3_COND_LT;
+                break;
+        default:
+                compile_assert(ctx, 0);
+                return;
+        }
+        if (is_const(a0) && is_const(a1))
+                a0 = get_unconst(ctx, a0);
+        tmp_src = get_internal_temp(ctx, &tmp_dst);
+        /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
+        instr = instr_create(ctx, 2, t->opc);
+        instr->cat2.condition = condition;
+        vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
+        /* absneg.s dst, (neg)tmp */
+        instr = instr_create(ctx, 2, OPC_ABSNEG_S);
+        vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_SNEG);
+        put_dst(ctx, inst, dst);
+}
+/*
+ * UCMP(a,b,c) = a ? b : c
+ *   sel.b16 dst, b, a, c
+ */
+static void
+trans_ucmp(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *a0, *a1, *a2;
+        a0 = &inst->Src[0].Register;  /* a */
+        a1 = &inst->Src[1].Register;  /* b */
+        a2 = &inst->Src[2].Register;  /* c */
+        if (is_rel_or_const(a0))
+                a0 = get_unconst(ctx, a0);
+        /* sel.{b32,b16} dst, b, a, c */
+        instr = instr_create(ctx, 3, OPC_SEL_B32);
+        vectorize(ctx, instr, dst, 3, a1, 0, a0, 0, a2, 0);
+        put_dst(ctx, inst, dst);
+}
+/*
+ * ISSG(a) = a < 0 ? -1 : a > 0 ? 1 : 0
+ *   cmps.s.lt tmp_neg, a, 0  # 1 if a is negative
+ *   cmps.s.gt tmp_pos, a, 0  # 1 if a is positive
+ *   sub.u dst, tmp_pos, tmp_neg
+ */
+static void
+trans_issg(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *a = &inst->Src[0].Register;
+        struct tgsi_dst_register neg_dst, pos_dst;
+        struct tgsi_src_register *neg_src, *pos_src;
+        neg_src = get_internal_temp(ctx, &neg_dst);
+        pos_src = get_internal_temp(ctx, &pos_dst);
+        /* cmps.s.lt neg, a, 0 */
+        instr = instr_create(ctx, 2, OPC_CMPS_S);
+        instr->cat2.condition = IR3_COND_LT;
+        vectorize(ctx, instr, &neg_dst, 2, a, 0, 0, IR3_REG_IMMED);
+        /* cmps.s.gt pos, a, 0 */
+        instr = instr_create(ctx, 2, OPC_CMPS_S);
+        instr->cat2.condition = IR3_COND_GT;
+        vectorize(ctx, instr, &pos_dst, 2, a, 0, 0, IR3_REG_IMMED);
+        /* sub.u dst, pos, neg */
+        instr = instr_create(ctx, 2, OPC_SUB_U);
+        vectorize(ctx, instr, dst, 2, pos_src, 0, neg_src, 0);
+        put_dst(ctx, inst, dst);
+}
+/*
+ * Conditional / Flow control
+ */
+static void
+push_branch(struct ir3_compile_context *ctx, bool inv,
+                struct ir3_instruction *instr, struct ir3_instruction *cond)
+{
+        unsigned int idx = ctx->branch_count++;
+        compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch));
+        ctx->branch[idx].instr = instr;
+        ctx->branch[idx].inv = inv;
+        /* else side of branch has same condition: */
+        if (!inv)
+                ctx->branch[idx].cond = cond;
+}
+static struct ir3_instruction *
+pop_branch(struct ir3_compile_context *ctx)
+{
+        unsigned int idx = --ctx->branch_count;
+        return ctx->branch[idx].instr;
+}
+static void
+trans_if(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr, *cond;
+        struct tgsi_src_register *src = &inst->Src[0].Register;
+        struct tgsi_dst_register tmp_dst;
+        struct tgsi_src_register *tmp_src;
+        struct tgsi_src_register constval;
+        get_immediate(ctx, &constval, fui(0.0));
+        tmp_src = get_internal_temp(ctx, &tmp_dst);
+        if (is_const(src))
+                src = get_unconst(ctx, src);
+        /* cmps.{f,u}.ne tmp0, b, {0.0} */
+        instr = instr_create(ctx, 2, t->opc);
+        add_dst_reg(ctx, instr, &tmp_dst, 0);
+        add_src_reg(ctx, instr, src, src->SwizzleX);
+        add_src_reg(ctx, instr, &constval, constval.SwizzleX);
+        instr->cat2.condition = IR3_COND_NE;
+        compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */
+        cond = instr->regs[1]->instr;
+        /* meta:flow tmp0 */
+        instr = instr_create(ctx, -1, OPC_META_FLOW);
+        ir3_reg_create(instr, 0, 0);  /* dummy dst */
+        add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X);
+        push_branch(ctx, false, instr, cond);
+        instr->flow.if_block = push_block(ctx);
+}
+static void
+trans_else(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        pop_block(ctx);
+        instr = pop_branch(ctx);
+        compile_assert(ctx, (instr->category == -1) &&
+                        (instr->opc == OPC_META_FLOW));
+        push_branch(ctx, true, instr, NULL);
+        instr->flow.else_block = push_block(ctx);
+}
+static struct ir3_instruction *
+find_temporary(struct ir3_block *block, unsigned n)
+{
+        if (block->parent && !block->temporaries[n])
+                return find_temporary(block->parent, n);
+        return block->temporaries[n];
+}
+static struct ir3_instruction *
+find_output(struct ir3_block *block, unsigned n)
+{
+        if (block->parent && !block->outputs[n])
+                return find_output(block->parent, n);
+        return block->outputs[n];
+}
+static struct ir3_instruction *
+create_phi(struct ir3_compile_context *ctx, struct ir3_instruction *cond,
+                struct ir3_instruction *a, struct ir3_instruction *b)
+{
+        struct ir3_instruction *phi;
+        compile_assert(ctx, cond);
+        /* Either side of the condition could be null..  which
+         * indicates a variable written on only one side of the
+         * branch.  Normally this should only be variables not
+         * used outside of that side of the branch.  So we could
+         * just 'return a ? a : b;' in that case.  But for better
+         * defined undefined behavior we just stick in imm{0.0}.
+         * In the common case of a value only used within the
+         * one side of the branch, the PHI instruction will not
+         * get scheduled
+         */
+        if (!a)
+                a = create_immed(ctx, 0.0);
+        if (!b)
+                b = create_immed(ctx, 0.0);
+        phi = instr_create(ctx, -1, OPC_META_PHI);
+        ir3_reg_create(phi, 0, 0);  /* dummy dst */
+        ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond;
+        ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a;
+        ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b;
+        return phi;
+}
+static void
+trans_endif(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct ir3_block *ifb, *elseb;
+        struct ir3_instruction **ifout, **elseout;
+        unsigned i, ifnout = 0, elsenout = 0;
+        pop_block(ctx);
+        instr = pop_branch(ctx);
+        compile_assert(ctx, (instr->category == -1) &&
+                        (instr->opc == OPC_META_FLOW));
+        ifb = instr->flow.if_block;
+        elseb = instr->flow.else_block;
+        /* if there is no else block, the parent block is used for the
+         * branch-not-taken src of the PHI instructions:
+         */
+        if (!elseb)
+                elseb = ifb->parent;
+        /* worst case sizes: */
+        ifnout = ifb->ntemporaries + ifb->noutputs;
+        elsenout = elseb->ntemporaries + elseb->noutputs;
+        ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout);
+        if (elseb != ifb->parent)
+                elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout);
+        ifnout = 0;
+        elsenout = 0;
+        /* generate PHI instructions for any temporaries written: */
+        for (i = 0; i < ifb->ntemporaries; i++) {
+                struct ir3_instruction *a = ifb->temporaries[i];
+                struct ir3_instruction *b = elseb->temporaries[i];
+                /* if temporary written in if-block, or if else block
+                 * is present and temporary written in else-block:
+                 */
+                if (a || ((elseb != ifb->parent) && b)) {
+                        struct ir3_instruction *phi;
+                        /* if only written on one side, find the closest
+                         * enclosing update on other side:
+                         */
+                        if (!a)
+                                a = find_temporary(ifb, i);
+                        if (!b)
+                                b = find_temporary(elseb, i);
+                        ifout[ifnout] = a;
+                        a = create_output(ifb, a, ifnout++);
+                        if (elseb != ifb->parent) {
+                                elseout[elsenout] = b;
+                                b = create_output(elseb, b, elsenout++);
+                        }
+                        phi = create_phi(ctx, instr, a, b);
+                        ctx->block->temporaries[i] = phi;
+                }
+        }
+        compile_assert(ctx, ifb->noutputs == elseb->noutputs);
+        /* .. and any outputs written: */
+        for (i = 0; i < ifb->noutputs; i++) {
+                struct ir3_instruction *a = ifb->outputs[i];
+                struct ir3_instruction *b = elseb->outputs[i];
+                /* if output written in if-block, or if else block
+                 * is present and output written in else-block:
+                 */
+                if (a || ((elseb != ifb->parent) && b)) {
+                        struct ir3_instruction *phi;
+                        /* if only written on one side, find the closest
+                         * enclosing update on other side:
+                         */
+                        if (!a)
+                                a = find_output(ifb, i);
+                        if (!b)
+                                b = find_output(elseb, i);
+                        ifout[ifnout] = a;
+                        a = create_output(ifb, a, ifnout++);
+                        if (elseb != ifb->parent) {
+                                elseout[elsenout] = b;
+                                b = create_output(elseb, b, elsenout++);
+                        }
+                        phi = create_phi(ctx, instr, a, b);
+                        ctx->block->outputs[i] = phi;
+                }
+        }
+        ifb->noutputs = ifnout;
+        ifb->outputs = ifout;
+        if (elseb != ifb->parent) {
+                elseb->noutputs = elsenout;
+                elseb->outputs = elseout;
+        }
+        // TODO maybe we want to compact block->inputs?
+}
+/*
+ * Kill
+ */
+static void
+trans_kill(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr, *immed, *cond = NULL;
+        bool inv = false;
+        /* unconditional kill, use enclosing if condition: */
+        if (ctx->branch_count > 0) {
+                unsigned int idx = ctx->branch_count - 1;
+                cond = ctx->branch[idx].cond;
+                inv = ctx->branch[idx].inv;
+        } else {
+                cond = create_immed(ctx, 1.0);
+        }
+        compile_assert(ctx, cond);
+        immed = create_immed(ctx, 0.0);
+        /* cmps.f.ne p0.x, cond, {0.0} */
+        instr = instr_create(ctx, 2, OPC_CMPS_F);
+        instr->cat2.condition = IR3_COND_NE;
+        ir3_reg_create(instr, regid(REG_P0, 0), 0);
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
+        cond = instr;
+        /* kill p0.x */
+        instr = instr_create(ctx, 0, OPC_KILL);
+        instr->cat0.inv = inv;
+        ir3_reg_create(instr, 0, 0);  /* dummy dst */
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
+        ctx->kill[ctx->kill_count++] = instr;
+        ctx->so->has_kill = true;
+}
+/*
+ * Kill-If
+ */
+static void
+trans_killif(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct tgsi_src_register *src = &inst->Src[0].Register;
+        struct ir3_instruction *instr, *immed, *cond = NULL;
+        bool inv = false;
+        immed = create_immed(ctx, 0.0);
+        /* cmps.f.ne p0.x, cond, {0.0} */
+        instr = instr_create(ctx, 2, OPC_CMPS_F);
+        instr->cat2.condition = IR3_COND_NE;
+        ir3_reg_create(instr, regid(REG_P0, 0), 0);
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
+        add_src_reg(ctx, instr, src, src->SwizzleX);
+        cond = instr;
+        /* kill p0.x */
+        instr = instr_create(ctx, 0, OPC_KILL);
+        instr->cat0.inv = inv;
+        ir3_reg_create(instr, 0, 0);  /* dummy dst */
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
+        ctx->kill[ctx->kill_count++] = instr;
+        ctx->so->has_kill = true;
+}
+/*
+ * I2F / U2F / F2I / F2U
+ */
+static void
+trans_cov(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *src = &inst->Src[0].Register;
+        // cov.f32s32 dst, tmp0 /
+        instr = instr_create(ctx, 1, 0);
+        switch (t->tgsi_opc) {
+        case TGSI_OPCODE_U2F:
+                instr->cat1.src_type = TYPE_U32;
+                instr->cat1.dst_type = TYPE_F32;
+                break;
+        case TGSI_OPCODE_I2F:
+                instr->cat1.src_type = TYPE_S32;
+                instr->cat1.dst_type = TYPE_F32;
+                break;
+        case TGSI_OPCODE_F2U:
+                instr->cat1.src_type = TYPE_F32;
+                instr->cat1.dst_type = TYPE_U32;
+                break;
+        case TGSI_OPCODE_F2I:
+                instr->cat1.src_type = TYPE_F32;
+                instr->cat1.dst_type = TYPE_S32;
+                break;
+        }
+        vectorize(ctx, instr, dst, 1, src, 0);
+        put_dst(ctx, inst, dst);
+}
+/*
+ * UMUL / UMAD
+ *
+ * There is no 32-bit multiply instruction, so splitting a and b into high and
+ * low components, we get that
+ *
+ * dst = al * bl + ah * bl << 16 + al * bh << 16
+ *
+ *  mull.u tmp0, a, b (mul low, i.e. al * bl)
+ *  madsh.m16 tmp1, a, b, tmp0 (mul-add shift high mix, i.e. ah * bl << 16)
+ *  madsh.m16 dst, b, a, tmp1 (i.e. al * bh << 16)
+ *
+ * For UMAD, add in the extra argument after mull.u.
+ */
+static void
+trans_umul(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *a = &inst->Src[0].Register;
+        struct tgsi_src_register *b = &inst->Src[1].Register;
+        struct tgsi_dst_register tmp0_dst, tmp1_dst;
+        struct tgsi_src_register *tmp0_src, *tmp1_src;
+        tmp0_src = get_internal_temp(ctx, &tmp0_dst);
+        tmp1_src = get_internal_temp(ctx, &tmp1_dst);
+        if (is_rel_or_const(a))
+                a = get_unconst(ctx, a);
+        if (is_rel_or_const(b))
+                b = get_unconst(ctx, b);
+        /* mull.u tmp0, a, b */
+        instr = instr_create(ctx, 2, OPC_MULL_U);
+        vectorize(ctx, instr, &tmp0_dst, 2, a, 0, b, 0);
+        if (t->tgsi_opc == TGSI_OPCODE_UMAD) {
+                struct tgsi_src_register *c = &inst->Src[2].Register;
+                /* add.u tmp0, tmp0, c */
+                instr = instr_create(ctx, 2, OPC_ADD_U);
+                vectorize(ctx, instr, &tmp0_dst, 2, tmp0_src, 0, c, 0);
+        }
+        /* madsh.m16 tmp1, a, b, tmp0 */
+        instr = instr_create(ctx, 3, OPC_MADSH_M16);
+        vectorize(ctx, instr, &tmp1_dst, 3, a, 0, b, 0, tmp0_src, 0);
+        /* madsh.m16 dst, b, a, tmp1 */
+        instr = instr_create(ctx, 3, OPC_MADSH_M16);
+        vectorize(ctx, instr, dst, 3, b, 0, a, 0, tmp1_src, 0);
+        put_dst(ctx, inst, dst);
+}
+/*
+ * IDIV / UDIV / MOD / UMOD
+ *
+ * See NV50LegalizeSSA::handleDIV for the origin of this implementation. For
+ * MOD/UMOD, it becomes a - [IU]DIV(a, modulus) * modulus.
+ */
+static void
+trans_idiv(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct ir3_instruction *instr;
+        struct tgsi_dst_register *dst = get_dst(ctx, inst), *premod_dst = dst;
+        struct tgsi_src_register *a = &inst->Src[0].Register;
+        struct tgsi_src_register *b = &inst->Src[1].Register;
+        struct tgsi_dst_register af_dst, bf_dst, q_dst, r_dst, a_dst, b_dst;
+        struct tgsi_src_register *af_src, *bf_src, *q_src, *r_src, *a_src, *b_src;
+        struct tgsi_src_register negative_2, thirty_one;
+        type_t src_type;
+        if (t->tgsi_opc == TGSI_OPCODE_IDIV || t->tgsi_opc == TGSI_OPCODE_MOD)
+                src_type = get_stype(ctx);
+        else
+                src_type = get_utype(ctx);
+        af_src = get_internal_temp(ctx, &af_dst);
+        bf_src = get_internal_temp(ctx, &bf_dst);
+        q_src = get_internal_temp(ctx, &q_dst);
+        r_src = get_internal_temp(ctx, &r_dst);
+        a_src = get_internal_temp(ctx, &a_dst);
+        b_src = get_internal_temp(ctx, &b_dst);
+        get_immediate(ctx, &negative_2, -2);
+        get_immediate(ctx, &thirty_one, 31);
+        if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD)
+                premod_dst = &q_dst;
+        /* cov.[us]32f32 af, numerator */
+        instr = instr_create(ctx, 1, 0);
+        instr->cat1.src_type = src_type;
+        instr->cat1.dst_type = get_ftype(ctx);
+        vectorize(ctx, instr, &af_dst, 1, a, 0);
+        /* cov.[us]32f32 bf, denominator */
+        instr = instr_create(ctx, 1, 0);
+        instr->cat1.src_type = src_type;
+        instr->cat1.dst_type = get_ftype(ctx);
+        vectorize(ctx, instr, &bf_dst, 1, b, 0);
+        /* Get the absolute values for IDIV */
+        if (type_sint(src_type)) {
+                /* absneg.f af, (abs)af */
+                instr = instr_create(ctx, 2, OPC_ABSNEG_F);
+                vectorize(ctx, instr, &af_dst, 1, af_src, IR3_REG_FABS);
+                /* absneg.f bf, (abs)bf */
+                instr = instr_create(ctx, 2, OPC_ABSNEG_F);
+                vectorize(ctx, instr, &bf_dst, 1, bf_src, IR3_REG_FABS);
+                /* absneg.s a, (abs)numerator */
+                instr = instr_create(ctx, 2, OPC_ABSNEG_S);
+                vectorize(ctx, instr, &a_dst, 1, a, IR3_REG_SABS);
+                /* absneg.s b, (abs)denominator */
+                instr = instr_create(ctx, 2, OPC_ABSNEG_S);
+                vectorize(ctx, instr, &b_dst, 1, b, IR3_REG_SABS);
+        } else {
+                /* mov.u32u32 a, numerator */
+                instr = instr_create(ctx, 1, 0);
+                instr->cat1.src_type = src_type;
+                instr->cat1.dst_type = src_type;
+                vectorize(ctx, instr, &a_dst, 1, a, 0);
+                /* mov.u32u32 b, denominator */
+                instr = instr_create(ctx, 1, 0);
+                instr->cat1.src_type = src_type;
+                instr->cat1.dst_type = src_type;
+                vectorize(ctx, instr, &b_dst, 1, b, 0);
+        }
+        /* rcp.f bf, bf */
+        instr = instr_create(ctx, 4, OPC_RCP);
+        vectorize(ctx, instr, &bf_dst, 1, bf_src, 0);
+        /* That's right, subtract 2 as an integer from the float */
+        /* add.u bf, bf, -2 */
+        instr = instr_create(ctx, 2, OPC_ADD_U);
+        vectorize(ctx, instr, &bf_dst, 2, bf_src, 0, &negative_2, 0);
+        /* mul.f q, af, bf */
+        instr = instr_create(ctx, 2, OPC_MUL_F);
+        vectorize(ctx, instr, &q_dst, 2, af_src, 0, bf_src, 0);
+        /* cov.f32[us]32 q, q */
+        instr = instr_create(ctx, 1, 0);
+        instr->cat1.src_type = get_ftype(ctx);
+        instr->cat1.dst_type = src_type;
+        vectorize(ctx, instr, &q_dst, 1, q_src, 0);
+        /* integer multiply q by b */
+        /* mull.u r, q, b */
+        instr = instr_create(ctx, 2, OPC_MULL_U);
+        vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0);
+        /* madsh.m16 r, q, b, r */
+        instr = instr_create(ctx, 3, OPC_MADSH_M16);
+        vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0);
+        /* madsh.m16, r, b, q, r */
+        instr = instr_create(ctx, 3, OPC_MADSH_M16);
+        vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0);
+        /* sub.u r, a, r */
+        instr = instr_create(ctx, 2, OPC_SUB_U);
+        vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0);
+        /* cov.u32f32, r, r */
+        instr = instr_create(ctx, 1, 0);
+        instr->cat1.src_type = get_utype(ctx);
+        instr->cat1.dst_type = get_ftype(ctx);
+        vectorize(ctx, instr, &r_dst, 1, r_src, 0);
+        /* mul.f r, r, bf */
+        instr = instr_create(ctx, 2, OPC_MUL_F);
+        vectorize(ctx, instr, &r_dst, 2, r_src, 0, bf_src, 0);
+        /* cov.f32u32 r, r */
+        instr = instr_create(ctx, 1, 0);
+        instr->cat1.src_type = get_ftype(ctx);
+        instr->cat1.dst_type = get_utype(ctx);
+        vectorize(ctx, instr, &r_dst, 1, r_src, 0);
+        /* add.u q, q, r */
+        instr = instr_create(ctx, 2, OPC_ADD_U);
+        vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0);
+        /* mull.u r, q, b */
+        instr = instr_create(ctx, 2, OPC_MULL_U);
+        vectorize(ctx, instr, &r_dst, 2, q_src, 0, b_src, 0);
+        /* madsh.m16 r, q, b, r */
+        instr = instr_create(ctx, 3, OPC_MADSH_M16);
+        vectorize(ctx, instr, &r_dst, 3, q_src, 0, b_src, 0, r_src, 0);
+        /* madsh.m16 r, b, q, r */
+        instr = instr_create(ctx, 3, OPC_MADSH_M16);
+        vectorize(ctx, instr, &r_dst, 3, b_src, 0, q_src, 0, r_src, 0);
+        /* sub.u r, a, r */
+        instr = instr_create(ctx, 2, OPC_SUB_U);
+        vectorize(ctx, instr, &r_dst, 2, a_src, 0, r_src, 0);
+        /* cmps.u.ge r, r, b */
+        instr = instr_create(ctx, 2, OPC_CMPS_U);
+        instr->cat2.condition = IR3_COND_GE;
+        vectorize(ctx, instr, &r_dst, 2, r_src, 0, b_src, 0);
+        if (type_uint(src_type)) {
+                /* add.u dst, q, r */
+                instr = instr_create(ctx, 2, OPC_ADD_U);
+                vectorize(ctx, instr, premod_dst, 2, q_src, 0, r_src, 0);
+        } else {
+                /* add.u q, q, r */
+                instr = instr_create(ctx, 2, OPC_ADD_U);
+                vectorize(ctx, instr, &q_dst, 2, q_src, 0, r_src, 0);
+                /* negate result based on the original arguments */
+                if (is_const(a) && is_const(b))
+                        a = get_unconst(ctx, a);
+                /* xor.b r, numerator, denominator */
+                instr = instr_create(ctx, 2, OPC_XOR_B);
+                vectorize(ctx, instr, &r_dst, 2, a, 0, b, 0);
+                /* shr.b r, r, 31 */
+                instr = instr_create(ctx, 2, OPC_SHR_B);
+                vectorize(ctx, instr, &r_dst, 2, r_src, 0, &thirty_one, 0);
+                /* absneg.s b, (neg)q */
+                instr = instr_create(ctx, 2, OPC_ABSNEG_S);
+                vectorize(ctx, instr, &b_dst, 1, q_src, IR3_REG_SNEG);
+                /* sel.b dst, b, r, q */
+                instr = instr_create(ctx, 3, OPC_SEL_B32);
+                vectorize(ctx, instr, premod_dst, 3, b_src, 0, r_src, 0, q_src, 0);
+        }
+        if (t->tgsi_opc == TGSI_OPCODE_MOD || t->tgsi_opc == TGSI_OPCODE_UMOD) {
+                /* The division result will have ended up in q. */
+                if (is_rel_or_const(b))
+                        b = get_unconst(ctx, b);
+                /* mull.u r, q, b */
+                instr = instr_create(ctx, 2, OPC_MULL_U);
+                vectorize(ctx, instr, &r_dst, 2, q_src, 0, b, 0);
+                /* madsh.m16 r, q, b, r */
+                instr = instr_create(ctx, 3, OPC_MADSH_M16);
+                vectorize(ctx, instr, &r_dst, 3, q_src, 0, b, 0, r_src, 0);
+                /* madsh.m16 r, b, q, r */
+                instr = instr_create(ctx, 3, OPC_MADSH_M16);
+                vectorize(ctx, instr, &r_dst, 3, b, 0, q_src, 0, r_src, 0);
+                /* sub.u dst, a, r */
+                instr = instr_create(ctx, 2, OPC_SUB_U);
+                vectorize(ctx, instr, dst, 2, a, 0, r_src, 0);
+        }
+        put_dst(ctx, inst, dst);
+}
+/*
+ * Handlers for TGSI instructions which do have 1:1 mapping to native
+ * instructions:
+ */
+static void
+instr_cat0(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        instr_create(ctx, 0, t->opc);
+}
+static void
+instr_cat1(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+        struct tgsi_src_register *src = &inst->Src[0].Register;
+        /* NOTE: atomic start/end, rather than in create_mov() since
+         * create_mov() is used already w/in atomic sequences (and
+         * we aren't clever enough to deal with the nesting)
+         */
+        instr_atomic_start(ctx);
+        create_mov(ctx, dst, src);
+        instr_atomic_end(ctx);
+}
+static void
+instr_cat2(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *src0 = &inst->Src[0].Register;
+        struct tgsi_src_register *src1 = &inst->Src[1].Register;
+        struct ir3_instruction *instr;
+        unsigned src0_flags = 0, src1_flags = 0;
+        switch (t->tgsi_opc) {
+        case TGSI_OPCODE_ABS:
+                src0_flags = IR3_REG_FABS;
+                break;
+        case TGSI_OPCODE_IABS:
+                src0_flags = IR3_REG_SABS;
+                break;
+        case TGSI_OPCODE_INEG:
+                src0_flags = IR3_REG_SNEG;
+                break;
+        case TGSI_OPCODE_SUB:
+                src1_flags = IR3_REG_FNEG;
+                break;
+        }
+        switch (t->opc) {
+        case OPC_ABSNEG_F:
+        case OPC_ABSNEG_S:
+        case OPC_CLZ_B:
+        case OPC_CLZ_S:
+        case OPC_SIGN_F:
+        case OPC_FLOOR_F:
+        case OPC_CEIL_F:
+        case OPC_RNDNE_F:
+        case OPC_RNDAZ_F:
+        case OPC_TRUNC_F:
+        case OPC_NOT_B:
+        case OPC_BFREV_B:
+        case OPC_SETRM:
+        case OPC_CBITS_B:
+                /* these only have one src reg */
+                instr = instr_create(ctx, 2, t->opc);
+                vectorize(ctx, instr, dst, 1, src0, src0_flags);
+                break;
+        default:
+                if (is_const(src0) && is_const(src1))
+                        src0 = get_unconst(ctx, src0);
+                instr = instr_create(ctx, 2, t->opc);
+                vectorize(ctx, instr, dst, 2, src0, src0_flags,
+                                src1, src1_flags);
+                break;
+        }
+        put_dst(ctx, inst, dst);
+}
+static void
+instr_cat3(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *src0 = &inst->Src[0].Register;
+        struct tgsi_src_register *src1 = &inst->Src[1].Register;
+        struct ir3_instruction *instr;
+        /* in particular, can't handle const for src1 for cat3..
+         * for mad, we can swap first two src's if needed:
+         */
+        if (is_rel_or_const(src1)) {
+                if (is_mad(t->opc) && !is_rel_or_const(src0)) {
+                        struct tgsi_src_register *tmp;
+                        tmp = src0;
+                        src0 = src1;
+                        src1 = tmp;
+                } else {
+                        src1 = get_unconst(ctx, src1);
+                }
+        }
+        instr = instr_create(ctx, 3, t->opc);
+        vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
+                        &inst->Src[2].Register, 0);
+        put_dst(ctx, inst, dst);
+}
+static void
+instr_cat4(const struct instr_translater *t,
+                struct ir3_compile_context *ctx,
+                struct tgsi_full_instruction *inst)
+{
+        struct tgsi_dst_register *dst = get_dst(ctx, inst);
+        struct tgsi_src_register *src = &inst->Src[0].Register;
+        struct ir3_instruction *instr;
+        unsigned i;
+        /* seems like blob compiler avoids const as src.. */
+        if (is_const(src))
+                src = get_unconst(ctx, src);
+        /* we need to replicate into each component: */
+        for (i = 0; i < 4; i++) {
+                if (dst->WriteMask & (1 << i)) {
+                        instr = instr_create(ctx, 4, t->opc);
+                        add_dst_reg(ctx, instr, dst, i);
+                        add_src_reg(ctx, instr, src, src->SwizzleX);
+                }
+        }
+        put_dst(ctx, inst, dst);
+}
+static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
+#define INSTR(n, f, ...) \
+        [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
+        INSTR(MOV,          instr_cat1),
+        INSTR(RCP,          instr_cat4, .opc = OPC_RCP),
+        INSTR(RSQ,          instr_cat4, .opc = OPC_RSQ),
+        INSTR(SQRT,         instr_cat4, .opc = OPC_SQRT),
+        INSTR(MUL,          instr_cat2, .opc = OPC_MUL_F),
+        INSTR(ADD,          instr_cat2, .opc = OPC_ADD_F),
+        INSTR(SUB,          instr_cat2, .opc = OPC_ADD_F),
+        INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
+        INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
+        INSTR(UADD,         instr_cat2, .opc = OPC_ADD_U),
+        INSTR(IMIN,         instr_cat2, .opc = OPC_MIN_S),
+        INSTR(UMIN,         instr_cat2, .opc = OPC_MIN_U),
+        INSTR(IMAX,         instr_cat2, .opc = OPC_MAX_S),
+        INSTR(UMAX,         instr_cat2, .opc = OPC_MAX_U),
+        INSTR(AND,          instr_cat2, .opc = OPC_AND_B),
+        INSTR(OR,           instr_cat2, .opc = OPC_OR_B),
+        INSTR(NOT,          instr_cat2, .opc = OPC_NOT_B),
+        INSTR(XOR,          instr_cat2, .opc = OPC_XOR_B),
+        INSTR(UMUL,         trans_umul),
+        INSTR(UMAD,         trans_umul),
+        INSTR(UDIV,         trans_idiv),
+        INSTR(IDIV,         trans_idiv),
+        INSTR(MOD,          trans_idiv),
+        INSTR(UMOD,         trans_idiv),
+        INSTR(SHL,          instr_cat2, .opc = OPC_SHL_B),
+        INSTR(USHR,         instr_cat2, .opc = OPC_SHR_B),
+        INSTR(ISHR,         instr_cat2, .opc = OPC_ASHR_B),
+        INSTR(IABS,         instr_cat2, .opc = OPC_ABSNEG_S),
+        INSTR(INEG,         instr_cat2, .opc = OPC_ABSNEG_S),
+        INSTR(AND,          instr_cat2, .opc = OPC_AND_B),
+        INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
+        INSTR(TRUNC,        instr_cat2, .opc = OPC_TRUNC_F),
+        INSTR(CLAMP,        trans_clamp),
+        INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
+        INSTR(ROUND,        instr_cat2, .opc = OPC_RNDNE_F),
+        INSTR(SSG,          instr_cat2, .opc = OPC_SIGN_F),
+        INSTR(CEIL,         instr_cat2, .opc = OPC_CEIL_F),
+        INSTR(ARL,          trans_arl),
+        INSTR(UARL,         trans_arl),
+        INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
+        INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
+        INSTR(ABS,          instr_cat2, .opc = OPC_ABSNEG_F),
+        INSTR(COS,          instr_cat4, .opc = OPC_COS),
+        INSTR(SIN,          instr_cat4, .opc = OPC_SIN),
+        INSTR(TEX,          trans_samp, .opc = OPC_SAM),
+        INSTR(TXP,          trans_samp, .opc = OPC_SAM),
+        INSTR(TXB,          trans_samp, .opc = OPC_SAMB),
+        INSTR(TXB2,         trans_samp, .opc = OPC_SAMB),
+        INSTR(TXL,          trans_samp, .opc = OPC_SAML),
+        INSTR(TXD,          trans_samp, .opc = OPC_SAMGQ),
+        INSTR(TXF,          trans_samp, .opc = OPC_ISAML),
+        INSTR(TXQ,          trans_txq),
+        INSTR(DDX,          trans_deriv, .opc = OPC_DSX),
+        INSTR(DDY,          trans_deriv, .opc = OPC_DSY),
+        INSTR(SGT,          trans_cmp),
+        INSTR(SLT,          trans_cmp),
+        INSTR(FSLT,         trans_cmp),
+        INSTR(SGE,          trans_cmp),
+        INSTR(FSGE,         trans_cmp),
+        INSTR(SLE,          trans_cmp),
+        INSTR(SNE,          trans_cmp),
+        INSTR(FSNE,         trans_cmp),
+        INSTR(SEQ,          trans_cmp),
+        INSTR(FSEQ,         trans_cmp),
+        INSTR(CMP,          trans_cmp),
+        INSTR(USNE,         trans_icmp, .opc = OPC_CMPS_U),
+        INSTR(USEQ,         trans_icmp, .opc = OPC_CMPS_U),
+        INSTR(ISGE,         trans_icmp, .opc = OPC_CMPS_S),
+        INSTR(USGE,         trans_icmp, .opc = OPC_CMPS_U),
+        INSTR(ISLT,         trans_icmp, .opc = OPC_CMPS_S),
+        INSTR(USLT,         trans_icmp, .opc = OPC_CMPS_U),
+        INSTR(UCMP,         trans_ucmp),
+        INSTR(ISSG,         trans_issg),
+        INSTR(IF,           trans_if,   .opc = OPC_CMPS_F),
+        INSTR(UIF,          trans_if,   .opc = OPC_CMPS_U),
+        INSTR(ELSE,         trans_else),
+        INSTR(ENDIF,        trans_endif),
+        INSTR(END,          instr_cat0, .opc = OPC_END),
+        INSTR(KILL,         trans_kill, .opc = OPC_KILL),
+        INSTR(KILL_IF,      trans_killif, .opc = OPC_KILL),
+        INSTR(I2F,          trans_cov),
+        INSTR(U2F,          trans_cov),
+        INSTR(F2I,          trans_cov),
+        INSTR(F2U,          trans_cov),
+};
+static ir3_semantic
+decl_semantic(const struct tgsi_declaration_semantic *sem)
+{
+        return ir3_semantic_name(sem->Name, sem->Index);
+}
+static struct ir3_instruction *
+decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
+                unsigned j, unsigned inloc, bool use_ldlv)
+{
+        struct ir3_instruction *instr;
+        struct ir3_register *src;
+        if (use_ldlv) {
+                /* ldlv.u32 dst, l[#inloc], 1 */
+                instr = instr_create(ctx, 6, OPC_LDLV);
+                instr->cat6.type = TYPE_U32;
+                instr->cat6.iim_val = 1;
+                ir3_reg_create(instr, regid, 0);   /* dummy dst */
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+                return instr;
+        }
+        /* bary.f dst, #inloc, r0.x */
+        instr = instr_create(ctx, 2, OPC_BARY_F);
+        ir3_reg_create(instr, regid, 0);   /* dummy dst */
+        ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
+        src = ir3_reg_create(instr, 0, IR3_REG_SSA);
+        src->wrmask = 0x3;
+        src->instr = ctx->frag_pos;
+        return instr;
+}
+/* TGSI_SEMANTIC_POSITION
+ * """"""""""""""""""""""
+ *
+ * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that
+ * fragment shader input contains the fragment's window position.  The X
+ * component starts at zero and always increases from left to right.
+ * The Y component starts at zero and always increases but Y=0 may either
+ * indicate the top of the window or the bottom depending on the fragment
+ * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN).
+ * The Z coordinate ranges from 0 to 1 to represent depth from the front
+ * to the back of the Z buffer.  The W component contains the reciprocol
+ * of the interpolated vertex position W component.
+ */
+static struct ir3_instruction *
+decl_in_frag_coord(struct ir3_compile_context *ctx, unsigned regid,
+                unsigned j)
+{
+        struct ir3_instruction *instr, *src;
+        compile_assert(ctx, !ctx->frag_coord[j]);
+        ctx->frag_coord[j] = create_input(ctx->block, NULL, 0);
+        switch (j) {
+        case 0: /* .x */
+        case 1: /* .y */
+                /* for frag_coord, we get unsigned values.. we need
+                 * to subtract (integer) 8 and divide by 16 (right-
+                 * shift by 4) then convert to float:
+                 */
+                /* add.s tmp, src, -8 */
+                instr = instr_create(ctx, 2, OPC_ADD_S);
+                ir3_reg_create(instr, regid, 0);    /* dummy dst */
+                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j];
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8;
+                src = instr;
+                /* shr.b tmp, tmp, 4 */
+                instr = instr_create(ctx, 2, OPC_SHR_B);
+                ir3_reg_create(instr, regid, 0);    /* dummy dst */
+                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4;
+                src = instr;
+                /* mov.u32f32 dst, tmp */
+                instr = instr_create(ctx, 1, 0);
+                instr->cat1.src_type = TYPE_U32;
+                instr->cat1.dst_type = TYPE_F32;
+                ir3_reg_create(instr, regid, 0);    /* dummy dst */
+                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+                break;
+        case 2: /* .z */
+        case 3: /* .w */
+                /* seems that we can use these as-is: */
+                instr = ctx->frag_coord[j];
+                break;
+        default:
+                compile_error(ctx, "invalid channel\n");
+                instr = create_immed(ctx, 0.0);
+                break;
+        }
+        return instr;
+}
+/* TGSI_SEMANTIC_FACE
+ * """"""""""""""""""
+ *
+ * This label applies to fragment shader inputs only and indicates that
+ * the register contains front/back-face information of the form (F, 0,
+ * 0, 1).  The first component will be positive when the fragment belongs
+ * to a front-facing polygon, and negative when the fragment belongs to a
+ * back-facing polygon.
+ */
+static struct ir3_instruction *
+decl_in_frag_face(struct ir3_compile_context *ctx, unsigned regid,
+                unsigned j)
+{
+        struct ir3_instruction *instr, *src;
+        switch (j) {
+        case 0: /* .x */
+                compile_assert(ctx, !ctx->frag_face);
+                ctx->frag_face = create_input(ctx->block, NULL, 0);
+                /* for faceness, we always get -1 or 0 (int).. but TGSI expects
+                 * positive vs negative float.. and piglit further seems to
+                 * expect -1.0 or 1.0:
+                 *
+                 *    mul.s tmp, hr0.x, 2
+                 *    add.s tmp, tmp, 1
+                 *    mov.s16f32, dst, tmp
+                 *
+                 */
+                instr = instr_create(ctx, 2, OPC_MUL_S);
+                ir3_reg_create(instr, regid, 0);    /* dummy dst */
+                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face;
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
+                src = instr;
+                instr = instr_create(ctx, 2, OPC_ADD_S);
+                ir3_reg_create(instr, regid, 0);    /* dummy dst */
+                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+                ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+                src = instr;
+                instr = instr_create(ctx, 1, 0); /* mov */
+                instr->cat1.src_type = TYPE_S32;
+                instr->cat1.dst_type = TYPE_F32;
+                ir3_reg_create(instr, regid, 0);    /* dummy dst */
+                ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
+                break;
+        case 1: /* .y */
+        case 2: /* .z */
+                instr = create_immed(ctx, 0.0);
+                break;
+        case 3: /* .w */
+                instr = create_immed(ctx, 1.0);
+                break;
+        default:
+                compile_error(ctx, "invalid channel\n");
+                instr = create_immed(ctx, 0.0);
+                break;
+        }
+        return instr;
+}
+static void
+decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        unsigned name = decl->Semantic.Name;
+        unsigned i;
+        /* I don't think we should get frag shader input without
+         * semantic info?  Otherwise how do inputs get linked to
+         * vert outputs?
+         */
+        compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
+                        decl->Declaration.Semantic);
+        for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+                unsigned n = so->inputs_count++;
+                unsigned r = regid(i, 0);
+                unsigned ncomp, j;
+                /* we'll figure out the actual components used after scheduling */
+                ncomp = 4;
+                DBG("decl in -> r%d", i);
+                compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
+                so->inputs[n].semantic = decl_semantic(&decl->Semantic);
+                so->inputs[n].compmask = (1 << ncomp) - 1;
+                so->inputs[n].regid = r;
+                so->inputs[n].inloc = ctx->next_inloc;
+                so->inputs[n].interpolate = decl->Interp.Interpolate;
+                for (j = 0; j < ncomp; j++) {
+                        struct ir3_instruction *instr = NULL;
+                        if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                                /* for fragment shaders, POSITION and FACE are handled
+                                 * specially, not using normal varying / bary.f
+                                 */
+                                if (name == TGSI_SEMANTIC_POSITION) {
+                                        so->inputs[n].bary = false;
+                                        so->frag_coord = true;
+                                        instr = decl_in_frag_coord(ctx, r + j, j);
+                                } else if (name == TGSI_SEMANTIC_FACE) {
+                                        so->inputs[n].bary = false;
+                                        so->frag_face = true;
+                                        instr = decl_in_frag_face(ctx, r + j, j);
+                                } else {
+                                        bool use_ldlv = false;
+                                        /* if no interpolation given, pick based on
+                                         * semantic:
+                                         */
+                                        if (!decl->Declaration.Interpolate) {
+                                                switch (decl->Semantic.Name) {
+                                                case TGSI_SEMANTIC_COLOR:
+                                                        so->inputs[n].interpolate =
+                                                                        TGSI_INTERPOLATE_COLOR;
+                                                        break;
+                                                default:
+                                                        so->inputs[n].interpolate =
+                                                                        TGSI_INTERPOLATE_LINEAR;
+                                                }
+                                        }
+                                        if (ctx->flat_bypass) {
+                                                switch (so->inputs[n].interpolate) {
+                                                case TGSI_INTERPOLATE_COLOR:
+                                                        if (!ctx->so->key.rasterflat)
+                                                                break;
+                                                        /* fallthrough */
+                                                case TGSI_INTERPOLATE_CONSTANT:
+                                                        use_ldlv = true;
+                                                        break;
+                                                }
+                                        }
+                                        so->inputs[n].bary = true;
+                                        instr = decl_in_frag_bary(ctx, r + j, j,
+                                                        so->inputs[n].inloc + j - 8, use_ldlv);
+                                }
+                        } else {
+                                instr = create_input(ctx->block, NULL, (i * 4) + j);
+                        }
+                        ctx->block->inputs[(i * 4) + j] = instr;
+                }
+                if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) {
+                        ctx->next_inloc += ncomp;
+                        so->total_in += ncomp;
+                }
+        }
+}
+static void
+decl_sv(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        unsigned r = regid(so->inputs_count, 0);
+        unsigned n = so->inputs_count++;
+        DBG("decl sv -> r%d", n);
+        compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
+        compile_assert(ctx, decl->Range.First < ARRAY_SIZE(ctx->sysval_semantics));
+        ctx->sysval_semantics[decl->Range.First] = decl->Semantic.Name;
+        so->inputs[n].semantic = decl_semantic(&decl->Semantic);
+        so->inputs[n].compmask = 1;
+        so->inputs[n].regid = r;
+        so->inputs[n].inloc = ctx->next_inloc;
+        so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
+        struct ir3_instruction *instr = NULL;
+        switch (decl->Semantic.Name) {
+        case TGSI_SEMANTIC_VERTEXID_NOBASE:
+                ctx->vertex_id = instr = create_input(ctx->block, NULL, r);
+                break;
+        case TGSI_SEMANTIC_BASEVERTEX:
+                ctx->basevertex = instr = instr_create(ctx, 1, 0);
+                instr->cat1.src_type = get_stype(ctx);
+                instr->cat1.dst_type = get_stype(ctx);
+                ir3_reg_create(instr, 0, 0);
+                ir3_reg_create(instr, regid(so->first_driver_param + 4, 0),
+                                           IR3_REG_CONST);
+                break;
+        case TGSI_SEMANTIC_INSTANCEID:
+                ctx->instance_id = instr = create_input(ctx->block, NULL, r);
+                break;
+        default:
+                compile_error(ctx, "Unknown semantic: %s\n",
+                                          tgsi_semantic_names[decl->Semantic.Name]);
+        }
+        ctx->block->inputs[r] = instr;
+        ctx->next_inloc++;
+        so->total_in++;
+}
+static void
+decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        unsigned comp = 0;
+        unsigned name = decl->Semantic.Name;
+        unsigned i;
+        compile_assert(ctx, decl->Declaration.Semantic);
+        DBG("decl out[%d] -> r%d", name, decl->Range.First);
+        if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                switch (name) {
+                case TGSI_SEMANTIC_POSITION:
+                        so->writes_pos = true;
+                        break;
+                case TGSI_SEMANTIC_PSIZE:
+                        so->writes_psize = true;
+                        break;
+                case TGSI_SEMANTIC_COLOR:
+                case TGSI_SEMANTIC_BCOLOR:
+                case TGSI_SEMANTIC_GENERIC:
+                case TGSI_SEMANTIC_FOG:
+                case TGSI_SEMANTIC_TEXCOORD:
+                        break;
+                default:
+                        compile_error(ctx, "unknown VS semantic name: %s\n",
+                                        tgsi_semantic_names[name]);
+                }
+        } else {
+                switch (name) {
+                case TGSI_SEMANTIC_POSITION:
+                        comp = 2;  /* tgsi will write to .z component */
+                        so->writes_pos = true;
+                        break;
+                case TGSI_SEMANTIC_COLOR:
+                        break;
+                default:
+                        compile_error(ctx, "unknown FS semantic name: %s\n",
+                                        tgsi_semantic_names[name]);
+                }
+        }
+        for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+                unsigned n = so->outputs_count++;
+                unsigned ncomp, j;
+                ncomp = 4;
+                compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
+                so->outputs[n].semantic = decl_semantic(&decl->Semantic);
+                so->outputs[n].regid = regid(i, comp);
+                /* avoid undefined outputs, stick a dummy mov from imm{0.0},
+                 * which if the output is actually assigned will be over-
+                 * written
+                 */
+                for (j = 0; j < ncomp; j++)
+                        ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0);
+        }
+}
+/* from TGSI perspective, we actually have inputs.  But most of the "inputs"
+ * for a fragment shader are just bary.f instructions.  The *actual* inputs
+ * from the hw perspective are the frag_pos and optionally frag_coord and
+ * frag_face.
+ */
+static void
+fixup_frag_inputs(struct ir3_compile_context *ctx)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction **inputs;
+        struct ir3_instruction *instr;
+        int n, regid = 0;
+        block->ninputs = 0;
+        n  = 4;  /* always have frag_pos */
+        n += COND(so->frag_face, 4);
+        n += COND(so->frag_coord, 4);
+        inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
+        if (so->frag_face) {
+                /* this ultimately gets assigned to hr0.x so doesn't conflict
+                 * with frag_coord/frag_pos..
+                 */
+                inputs[block->ninputs++] = ctx->frag_face;
+                ctx->frag_face->regs[0]->num = 0;
+                /* remaining channels not used, but let's avoid confusing
+                 * other parts that expect inputs to come in groups of vec4
+                 */
+                inputs[block->ninputs++] = NULL;
+                inputs[block->ninputs++] = NULL;
+                inputs[block->ninputs++] = NULL;
+        }
+        /* since we don't know where to set the regid for frag_coord,
+         * we have to use r0.x for it.  But we don't want to *always*
+         * use r1.x for frag_pos as that could increase the register
+         * footprint on simple shaders:
+         */
+        if (so->frag_coord) {
+                ctx->frag_coord[0]->regs[0]->num = regid++;
+                ctx->frag_coord[1]->regs[0]->num = regid++;
+                ctx->frag_coord[2]->regs[0]->num = regid++;
+                ctx->frag_coord[3]->regs[0]->num = regid++;
+                inputs[block->ninputs++] = ctx->frag_coord[0];
+                inputs[block->ninputs++] = ctx->frag_coord[1];
+                inputs[block->ninputs++] = ctx->frag_coord[2];
+                inputs[block->ninputs++] = ctx->frag_coord[3];
+        }
+        /* we always have frag_pos: */
+        so->pos_regid = regid;
+        /* r0.x */
+        instr = create_input(block, NULL, block->ninputs);
+        instr->regs[0]->num = regid++;
+        inputs[block->ninputs++] = instr;
+        ctx->frag_pos->regs[1]->instr = instr;
+        /* r0.y */
+        instr = create_input(block, NULL, block->ninputs);
+        instr->regs[0]->num = regid++;
+        inputs[block->ninputs++] = instr;
+        ctx->frag_pos->regs[2]->instr = instr;
+        block->inputs = inputs;
+}
+static void
+compile_instructions(struct ir3_compile_context *ctx)
+{
+        push_block(ctx);
+        /* for fragment shader, we have a single input register (usually
+         * r0.xy) which is used as the base for bary.f varying fetch instrs:
+         */
+        if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                struct ir3_instruction *instr;
+                instr = ir3_instr_create(ctx->block, -1, OPC_META_FI);
+                ir3_reg_create(instr, 0, 0);
+                ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.x */
+                ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.y */
+                ctx->frag_pos = instr;
+        }
+        while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+                tgsi_parse_token(&ctx->parser);
+                switch (ctx->parser.FullToken.Token.Type) {
+                case TGSI_TOKEN_TYPE_DECLARATION: {
+                        struct tgsi_full_declaration *decl =
+                                        &ctx->parser.FullToken.FullDeclaration;
+                        unsigned file = decl->Declaration.File;
+                        if (file == TGSI_FILE_OUTPUT) {
+                                decl_out(ctx, decl);
+                        } else if (file == TGSI_FILE_INPUT) {
+                                decl_in(ctx, decl);
+                        } else if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+                                decl_sv(ctx, decl);
+                        }
+                        if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) {
+                                int aid = decl->Array.ArrayID + ctx->array_offsets[file];
+                                compile_assert(ctx, aid < ARRAY_SIZE(ctx->array));
+                                /* legacy ArrayID==0 stuff probably isn't going to work
+                                 * well (and is at least untested).. let's just scream:
+                                 */
+                                compile_assert(ctx, aid != 0);
+                                ctx->array[aid].first = decl->Range.First;
+                                ctx->array[aid].last  = decl->Range.Last;
+                        }
+                        break;
+                }
+                case TGSI_TOKEN_TYPE_IMMEDIATE: {
+                        /* TODO: if we know the immediate is small enough, and only
+                         * used with instructions that can embed an immediate, we
+                         * can skip this:
+                         */
+                        struct tgsi_full_immediate *imm =
+                                        &ctx->parser.FullToken.FullImmediate;
+                        unsigned n = ctx->so->immediates_count++;
+                        compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates));
+                        memcpy(ctx->so->immediates[n].val, imm->u, 16);
+                        break;
+                }
+                case TGSI_TOKEN_TYPE_INSTRUCTION: {
+                        struct tgsi_full_instruction *inst =
+                                        &ctx->parser.FullToken.FullInstruction;
+                        unsigned opc = inst->Instruction.Opcode;
+                        const struct instr_translater *t = &translaters[opc];
+                        if (t->fxn) {
+                                t->fxn(t, ctx, inst);
+                                ctx->num_internal_temps = 0;
+                                compile_assert(ctx, !ctx->using_tmp_dst);
+                        } else {
+                                compile_error(ctx, "unknown TGSI opc: %s\n",
+                                                tgsi_get_opcode_name(opc));
+                        }
+                        switch (inst->Instruction.Saturate) {
+                        case TGSI_SAT_ZERO_ONE:
+                                create_clamp_imm(ctx, &inst->Dst[0].Register,
+                                                fui(0.0), fui(1.0));
+                                break;
+                        case TGSI_SAT_MINUS_PLUS_ONE:
+                                create_clamp_imm(ctx, &inst->Dst[0].Register,
+                                                fui(-1.0), fui(1.0));
+                                break;
+                        }
+                        instr_finish(ctx);
+                        break;
+                }
+                case TGSI_TOKEN_TYPE_PROPERTY: {
+                        struct tgsi_full_property *prop =
+                                &ctx->parser.FullToken.FullProperty;
+                        switch (prop->Property.PropertyName) {
+                        case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
+                                ctx->so->color0_mrt = !!prop->u[0].Data;
+                                break;
+                        }
+                }
+                default:
+                        break;
+                }
+        }
+}
+static void
+compile_dump(struct ir3_compile_context *ctx)
+{
+        const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag";
+        static unsigned n = 0;
+        char fname[16];
+        FILE *f;
+        snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++);
+        f = fopen(fname, "w");
+        if (!f)
+                return;
+        ir3_block_depth(ctx->block);
+        ir3_dump(ctx->ir, name, ctx->block, f);
+        fclose(f);
+}
+int
+ir3_compile_shader(struct ir3_shader_variant *so,
+                const struct tgsi_token *tokens, struct ir3_shader_key key,
+                bool cp)
+{
+        struct ir3_compile_context ctx;
+        struct ir3_block *block;
+        struct ir3_instruction **inputs;
+        unsigned i, j, actual_in;
+        int ret = 0, max_bary;
+        assert(!so->ir);
+        so->ir = ir3_create();
+        assert(so->ir);
+        if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) {
+                DBG("INIT failed!");
+                ret = -1;
+                goto out;
+        }
+        /* for now, until the edge cases are worked out: */
+        if (ctx.info.indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
+                cp = false;
+        compile_instructions(&ctx);
+        block = ctx.block;
+        so->ir->block = block;
+        /* keep track of the inputs from TGSI perspective.. */
+        inputs = block->inputs;
+        /* but fixup actual inputs for frag shader: */
+        if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+                fixup_frag_inputs(&ctx);
+        /* at this point, for binning pass, throw away unneeded outputs: */
+        if (key.binning_pass) {
+                for (i = 0, j = 0; i < so->outputs_count; i++) {
+                        unsigned name = sem2name(so->outputs[i].semantic);
+                        unsigned idx = sem2idx(so->outputs[i].semantic);
+                        /* throw away everything but first position/psize */
+                        if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
+                                        (name == TGSI_SEMANTIC_PSIZE))) {
+                                if (i != j) {
+                                        so->outputs[j] = so->outputs[i];
+                                        block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
+                                        block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
+                                        block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
+                                        block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
+                                }
+                                j++;
+                        }
+                }
+                so->outputs_count = j;
+                block->noutputs = j * 4;
+        }
+        /* if we want half-precision outputs, mark the output registers
+         * as half:
+         */
+        if (key.half_precision) {
+                for (i = 0; i < block->noutputs; i++) {
+                        if (!block->outputs[i])
+                                continue;
+                        block->outputs[i]->regs[0]->flags |= IR3_REG_HALF;
+                }
+        }
+        /* at this point, we want the kill's in the outputs array too,
+         * so that they get scheduled (since they have no dst).. we've
+         * already ensured that the array is big enough in push_block():
+         */
+        if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+                for (i = 0; i < ctx.kill_count; i++)
+                        block->outputs[block->noutputs++] = ctx.kill[i];
+        }
+        if (fd_mesa_debug & FD_DBG_OPTDUMP)
+                compile_dump(&ctx);
+        ret = ir3_block_flatten(block);
+        if (ret < 0) {
+                DBG("FLATTEN failed!");
+                goto out;
+        }
+        if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP))
+                compile_dump(&ctx);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("BEFORE CP:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ir3_block_depth(block);
+        /* First remove all the extra mov's (which we could skip if the
+         * front-end was clever enough not to insert them in the first
+         * place).  Then figure out left/right neighbors, re-inserting
+         * extra mov's when needed to avoid conflicts.
+         */
+        if (cp && !(fd_mesa_debug & FD_DBG_NOCP))
+                ir3_block_cp(block);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("BEFORE GROUPING:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        /* Group left/right neighbors, inserting mov's where needed to
+         * solve conflicts:
+         */
+        ir3_block_group(block);
+        if (fd_mesa_debug & FD_DBG_OPTDUMP)
+                compile_dump(&ctx);
+        ir3_block_depth(block);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("AFTER DEPTH:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ret = ir3_block_sched(block);
+        if (ret) {
+                DBG("SCHED failed!");
+                goto out;
+        }
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("AFTER SCHED:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
+        if (ret) {
+                DBG("RA failed!");
+                goto out;
+        }
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("AFTER RA:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ir3_block_legalize(block, &so->has_samp, &max_bary);
+        /* fixup input/outputs: */
+        for (i = 0; i < so->outputs_count; i++) {
+                so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
+                /* preserve hack for depth output.. tgsi writes depth to .z,
+                 * but what we give the hw is the scalar register:
+                 */
+                if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) &&
+                        (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
+                        so->outputs[i].regid += 2;
+        }
+        /* Note that some or all channels of an input may be unused: */
+        actual_in = 0;
+        for (i = 0; i < so->inputs_count; i++) {
+                unsigned j, regid = ~0, compmask = 0;
+                so->inputs[i].ncomp = 0;
+                for (j = 0; j < 4; j++) {
+                        struct ir3_instruction *in = inputs[(i*4) + j];
+                        if (in) {
+                                compmask |= (1 << j);
+                                regid = in->regs[0]->num - j;
+                                actual_in++;
+                                so->inputs[i].ncomp++;
+                        }
+                }
+                so->inputs[i].regid = regid;
+                so->inputs[i].compmask = compmask;
+        }
+        /* fragment shader always gets full vec4's even if it doesn't
+         * fetch all components, but vertex shader we need to update
+         * with the actual number of components fetch, otherwise thing
+         * will hang due to mismaptch between VFD_DECODE's and
+         * TOTALATTRTOVS
+         */
+        if (so->type == SHADER_VERTEX)
+                so->total_in = actual_in;
+        else
+                so->total_in = align(max_bary + 1, 4);
+out:
+        if (ret) {
+                ir3_destroy(so->ir);
+                so->ir = NULL;
+        }
+        compile_free(&ctx);
+        return ret;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
 ,0 → 1,42
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef IR3_COMPILER_H_
+#define IR3_COMPILER_H_
+#include "ir3_shader.h"
+int ir3_compile_shader_nir(struct ir3_shader_variant *so,
+                const struct tgsi_token *tokens, struct ir3_shader_key key);
+int ir3_compile_shader(struct ir3_shader_variant *so,
+                const struct tgsi_token *tokens,
+                struct ir3_shader_key key, bool cp);
+#endif /* IR3_COMPILER_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
 ,0 → 1,2120
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include <stdarg.h>
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_lowering.h"
+#include "tgsi/tgsi_strings.h"
+#include "nir/tgsi_to_nir.h"
+#include "glsl/shader_enums.h"
+#include "freedreno_util.h"
+#include "ir3_compiler.h"
+#include "ir3_shader.h"
+#include "ir3_nir.h"
+#include "instr-a3xx.h"
+#include "ir3.h"
+static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
+struct ir3_compile {
+        const struct tgsi_token *tokens;
+        struct nir_shader *s;
+        struct ir3 *ir;
+        struct ir3_shader_variant *so;
+        /* bitmask of which samplers are integer: */
+        uint16_t integer_s;
+        struct ir3_block *block;
+        /* For fragment shaders, from the hw perspective the only
+         * actual input is r0.xy position register passed to bary.f.
+         * But TGSI doesn't know that, it still declares things as
+         * IN[] registers.  So we do all the input tracking normally
+         * and fix things up after compile_instructions()
+         *
+         * NOTE that frag_pos is the hardware position (possibly it
+         * is actually an index or tag or some such.. it is *not*
+         * values that can be directly used for gl_FragCoord..)
+         */
+        struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4];
+        /* For vertex shaders, keep track of the system values sources */
+        struct ir3_instruction *vertex_id, *basevertex, *instance_id;
+        /* mapping from nir_register to defining instruction: */
+        struct hash_table *def_ht;
+        /* mapping from nir_variable to ir3_array: */
+        struct hash_table *var_ht;
+        unsigned num_arrays;
+        /* a common pattern for indirect addressing is to request the
+         * same address register multiple times.  To avoid generating
+         * duplicate instruction sequences (which our backend does not
+         * try to clean up, since that should be done as the NIR stage)
+         * we cache the address value generated for a given src value:
+         */
+        struct hash_table *addr_ht;
+        /* for calculating input/output positions/linkages: */
+        unsigned next_inloc;
+        /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
+         * so we need to use ldlv.u32 to load the varying directly:
+         */
+        bool flat_bypass;
+        /* on a3xx, we need to add one to # of array levels:
+         */
+        bool levels_add_one;
+        /* for looking up which system value is which */
+        unsigned sysval_semantics[8];
+        /* list of kill instructions: */
+        struct ir3_instruction *kill[16];
+        unsigned int kill_count;
+        /* set if we encounter something we can't handle yet, so we
+         * can bail cleanly and fallback to TGSI compiler f/e
+         */
+        bool error;
+};
+static struct nir_shader *to_nir(const struct tgsi_token *tokens)
+{
+        struct nir_shader_compiler_options options = {
+                        .lower_fpow = true,
+                        .lower_fsat = true,
+                        .lower_scmp = true,
+                        .lower_flrp = true,
+                        .native_integers = true,
+        };
+        bool progress;
+        struct nir_shader *s = tgsi_to_nir(tokens, &options);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                debug_printf("----------------------\n");
+                nir_print_shader(s, stdout);
+                debug_printf("----------------------\n");
+        }
+        nir_opt_global_to_local(s);
+        nir_convert_to_ssa(s);
+        nir_lower_idiv(s);
+        do {
+                progress = false;
+                nir_lower_vars_to_ssa(s);
+                nir_lower_alu_to_scalar(s);
+                progress |= nir_copy_prop(s);
+                progress |= nir_opt_dce(s);
+                progress |= nir_opt_cse(s);
+                progress |= ir3_nir_lower_if_else(s);
+                progress |= nir_opt_algebraic(s);
+                progress |= nir_opt_constant_folding(s);
+        } while (progress);
+        nir_remove_dead_variables(s);
+        nir_validate_shader(s);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                debug_printf("----------------------\n");
+                nir_print_shader(s, stdout);
+                debug_printf("----------------------\n");
+        }
+        return s;
+}
+/* TODO nir doesn't lower everything for us yet, but ideally it would: */
+static const struct tgsi_token *
+lower_tgsi(const struct tgsi_token *tokens, struct ir3_shader_variant *so)
+{
+        struct tgsi_shader_info info;
+        struct tgsi_lowering_config lconfig = {
+                        .color_two_side = so->key.color_two_side,
+                        .lower_FRC = true,
+        };
+        switch (so->type) {
+        case SHADER_FRAGMENT:
+        case SHADER_COMPUTE:
+                lconfig.saturate_s = so->key.fsaturate_s;
+                lconfig.saturate_t = so->key.fsaturate_t;
+                lconfig.saturate_r = so->key.fsaturate_r;
+                break;
+        case SHADER_VERTEX:
+                lconfig.saturate_s = so->key.vsaturate_s;
+                lconfig.saturate_t = so->key.vsaturate_t;
+                lconfig.saturate_r = so->key.vsaturate_r;
+                break;
+        }
+        if (!so->shader) {
+                /* hack for standalone compiler which does not have
+                 * screen/context:
+                 */
+        } else if (ir3_shader_gpuid(so->shader) >= 400) {
+                /* a4xx seems to have *no* sam.p */
+                lconfig.lower_TXP = ~0;  /* lower all txp */
+        } else {
+                /* a3xx just needs to avoid sam.p for 3d tex */
+                lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
+        }
+        return tgsi_transform_lowering(&lconfig, tokens, &info);
+}
+static struct ir3_compile *
+compile_init(struct ir3_shader_variant *so,
+                const struct tgsi_token *tokens)
+{
+        struct ir3_compile *ctx = rzalloc(NULL, struct ir3_compile);
+        const struct tgsi_token *lowered_tokens;
+        if (!so->shader) {
+                /* hack for standalone compiler which does not have
+                 * screen/context:
+                 */
+        } else if (ir3_shader_gpuid(so->shader) >= 400) {
+                /* need special handling for "flat" */
+                ctx->flat_bypass = true;
+                ctx->levels_add_one = false;
+        } else {
+                /* no special handling for "flat" */
+                ctx->flat_bypass = false;
+                ctx->levels_add_one = true;
+        }
+        switch (so->type) {
+        case SHADER_FRAGMENT:
+        case SHADER_COMPUTE:
+                ctx->integer_s = so->key.finteger_s;
+                break;
+        case SHADER_VERTEX:
+                ctx->integer_s = so->key.vinteger_s;
+                break;
+        }
+        ctx->ir = so->ir;
+        ctx->so = so;
+        ctx->next_inloc = 8;
+        ctx->def_ht = _mesa_hash_table_create(ctx,
+                        _mesa_hash_pointer, _mesa_key_pointer_equal);
+        ctx->var_ht = _mesa_hash_table_create(ctx,
+                        _mesa_hash_pointer, _mesa_key_pointer_equal);
+        ctx->addr_ht = _mesa_hash_table_create(ctx,
+                        _mesa_hash_pointer, _mesa_key_pointer_equal);
+        lowered_tokens = lower_tgsi(tokens, so);
+        if (!lowered_tokens)
+                lowered_tokens = tokens;
+        ctx->s = to_nir(lowered_tokens);
+        if (lowered_tokens != tokens)
+                free((void *)lowered_tokens);
+        so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
+        /* one (vec4) slot for vertex id base: */
+        if (so->type == SHADER_VERTEX)
+                so->first_immediate++;
+        /* reserve 4 (vec4) slots for ubo base addresses: */
+        so->first_immediate += 4;
+        return ctx;
+}
+static void
+compile_error(struct ir3_compile *ctx, const char *format, ...)
+{
+        va_list ap;
+        va_start(ap, format);
+        _debug_vprintf(format, ap);
+        va_end(ap);
+        nir_print_shader(ctx->s, stdout);
+        ctx->error = true;
+        debug_assert(0);
+}
+#define compile_assert(ctx, cond) do { \
+                if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
+        } while (0)
+static void
+compile_free(struct ir3_compile *ctx)
+{
+        ralloc_free(ctx);
+}
+struct ir3_array {
+        unsigned length, aid;
+        struct ir3_instruction *arr[];
+};
+static void
+declare_var(struct ir3_compile *ctx, nir_variable *var)
+{
+        unsigned length = glsl_get_length(var->type) * 4;  /* always vec4, at least with ttn */
+        struct ir3_array *arr = ralloc_size(ctx, sizeof(*arr) +
+                        (length * sizeof(arr->arr[0])));
+        arr->length = length;
+        arr->aid = ++ctx->num_arrays;
+        /* Some shaders end up reading array elements without first writing..
+         * so initialize things to prevent null instr ptrs later:
+         */
+        for (unsigned i = 0; i < length; i++)
+                arr->arr[i] = create_immed(ctx->block, 0);
+        _mesa_hash_table_insert(ctx->var_ht, var, arr);
+}
+static struct ir3_array *
+get_var(struct ir3_compile *ctx, nir_variable *var)
+{
+        struct hash_entry *entry = _mesa_hash_table_search(ctx->var_ht, var);
+        return entry->data;
+}
+/* allocate a n element value array (to be populated by caller) and
+ * insert in def_ht
+ */
+static struct ir3_instruction **
+__get_dst(struct ir3_compile *ctx, void *key, unsigned n)
+{
+        struct ir3_instruction **value =
+                ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
+        _mesa_hash_table_insert(ctx->def_ht, key, value);
+        return value;
+}
+static struct ir3_instruction **
+get_dst(struct ir3_compile *ctx, nir_dest *dst, unsigned n)
+{
+        if (dst->is_ssa) {
+                return __get_dst(ctx, &dst->ssa, n);
+        } else {
+                return __get_dst(ctx, dst->reg.reg, n);
+        }
+}
+static struct ir3_instruction **
+get_dst_ssa(struct ir3_compile *ctx, nir_ssa_def *dst, unsigned n)
+{
+        return __get_dst(ctx, dst, n);
+}
+static struct ir3_instruction **
+get_src(struct ir3_compile *ctx, nir_src *src)
+{
+        struct hash_entry *entry;
+        if (src->is_ssa) {
+                entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
+        } else {
+                entry = _mesa_hash_table_search(ctx->def_ht, src->reg.reg);
+        }
+        compile_assert(ctx, entry);
+        return entry->data;
+}
+static struct ir3_instruction *
+create_immed(struct ir3_block *block, uint32_t val)
+{
+        struct ir3_instruction *mov;
+        mov = ir3_instr_create(block, 1, 0);
+        mov->cat1.src_type = TYPE_U32;
+        mov->cat1.dst_type = TYPE_U32;
+        ir3_reg_create(mov, 0, 0);
+        ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val;
+        return mov;
+}
+static struct ir3_instruction *
+create_addr(struct ir3_block *block, struct ir3_instruction *src)
+{
+        struct ir3_instruction *instr, *immed;
+        /* TODO in at least some cases, the backend could probably be
+         * made clever enough to propagate IR3_REG_HALF..
+         */
+        instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
+        instr->regs[0]->flags |= IR3_REG_HALF;
+        immed = create_immed(block, 2);
+        immed->regs[0]->flags |= IR3_REG_HALF;
+        instr = ir3_SHL_B(block, instr, 0, immed, 0);
+        instr->regs[0]->flags |= IR3_REG_HALF;
+        instr->regs[1]->flags |= IR3_REG_HALF;
+        instr = ir3_MOV(block, instr, TYPE_S16);
+        instr->regs[0]->flags |= IR3_REG_ADDR | IR3_REG_HALF;
+        instr->regs[1]->flags |= IR3_REG_HALF;
+        return instr;
+}
+/* caches addr values to avoid generating multiple cov/shl/mova
+ * sequences for each use of a given NIR level src as address
+ */
+static struct ir3_instruction *
+get_addr(struct ir3_compile *ctx, struct ir3_instruction *src)
+{
+        struct ir3_instruction *addr;
+        struct hash_entry *entry;
+        entry = _mesa_hash_table_search(ctx->addr_ht, src);
+        if (entry)
+                return entry->data;
+        /* TODO do we need to cache per block? */
+        addr = create_addr(ctx->block, src);
+        _mesa_hash_table_insert(ctx->addr_ht, src, addr);
+        return addr;
+}
+static struct ir3_instruction *
+create_uniform(struct ir3_compile *ctx, unsigned n)
+{
+        struct ir3_instruction *mov;
+        mov = ir3_instr_create(ctx->block, 1, 0);
+        /* TODO get types right? */
+        mov->cat1.src_type = TYPE_F32;
+        mov->cat1.dst_type = TYPE_F32;
+        ir3_reg_create(mov, 0, 0);
+        ir3_reg_create(mov, n, IR3_REG_CONST);
+        return mov;
+}
+static struct ir3_instruction *
+create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
+                struct ir3_instruction *address)
+{
+        struct ir3_instruction *mov;
+        mov = ir3_instr_create(ctx->block, 1, 0);
+        mov->cat1.src_type = TYPE_U32;
+        mov->cat1.dst_type = TYPE_U32;
+        ir3_reg_create(mov, 0, 0);
+        ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
+        mov->address = address;
+        array_insert(ctx->ir->indirects, mov);
+        return mov;
+}
+static struct ir3_instruction *
+create_collect(struct ir3_block *block, struct ir3_instruction **arr,
+                unsigned arrsz)
+{
+        struct ir3_instruction *collect;
+        if (arrsz == 0)
+                return NULL;
+        collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz);
+        ir3_reg_create(collect, 0, 0);
+        for (unsigned i = 0; i < arrsz; i++)
+                ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i];
+        return collect;
+}
+static struct ir3_instruction *
+create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
+                struct ir3_instruction *address, struct ir3_instruction *collect)
+{
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction *mov;
+        struct ir3_register *src;
+        mov = ir3_instr_create(block, 1, 0);
+        mov->cat1.src_type = TYPE_U32;
+        mov->cat1.dst_type = TYPE_U32;
+        ir3_reg_create(mov, 0, 0);
+        src = ir3_reg_create(mov, 0, IR3_REG_SSA | IR3_REG_RELATIV);
+        src->instr = collect;
+        src->size  = arrsz;
+        src->offset = n;
+        mov->address = address;
+        array_insert(ctx->ir->indirects, mov);
+        return mov;
+}
+static struct ir3_instruction *
+create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
+                struct ir3_instruction *src, struct ir3_instruction *address,
+                struct ir3_instruction *collect)
+{
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction *mov;
+        struct ir3_register *dst;
+        mov = ir3_instr_create(block, 1, 0);
+        mov->cat1.src_type = TYPE_U32;
+        mov->cat1.dst_type = TYPE_U32;
+        dst = ir3_reg_create(mov, 0, IR3_REG_RELATIV);
+        dst->size  = arrsz;
+        dst->offset = n;
+        ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
+        mov->address = address;
+        mov->fanin = collect;
+        array_insert(ctx->ir->indirects, mov);
+        return mov;
+}
+static struct ir3_instruction *
+create_input(struct ir3_block *block, struct ir3_instruction *instr,
+                unsigned n)
+{
+        struct ir3_instruction *in;
+        in = ir3_instr_create(block, -1, OPC_META_INPUT);
+        in->inout.block = block;
+        ir3_reg_create(in, n, 0);
+        if (instr)
+                ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
+        return in;
+}
+static struct ir3_instruction *
+create_frag_input(struct ir3_compile *ctx, unsigned n, bool use_ldlv)
+{
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction *instr;
+        struct ir3_instruction *inloc = create_immed(block, n);
+        if (use_ldlv) {
+                instr = ir3_LDLV(block, inloc, 0, create_immed(block, 1), 0);
+                instr->cat6.type = TYPE_U32;
+                instr->cat6.iim_val = 1;
+        } else {
+                instr = ir3_BARY_F(block, inloc, 0, ctx->frag_pos, 0);
+                instr->regs[2]->wrmask = 0x3;
+        }
+        return instr;
+}
+static struct ir3_instruction *
+create_frag_coord(struct ir3_compile *ctx, unsigned comp)
+{
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction *instr;
+        compile_assert(ctx, !ctx->frag_coord[comp]);
+        ctx->frag_coord[comp] = create_input(ctx->block, NULL, 0);
+        switch (comp) {
+        case 0: /* .x */
+        case 1: /* .y */
+                /* for frag_coord, we get unsigned values.. we need
+                 * to subtract (integer) 8 and divide by 16 (right-
+                 * shift by 4) then convert to float:
+                 *
+                 *    sub.s tmp, src, 8
+                 *    shr.b tmp, tmp, 4
+                 *    mov.u32f32 dst, tmp
+                 *
+                 */
+                instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0,
+                                create_immed(block, 8), 0);
+                instr = ir3_SHR_B(block, instr, 0,
+                                create_immed(block, 4), 0);
+                instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
+                return instr;
+        case 2: /* .z */
+        case 3: /* .w */
+        default:
+                /* seems that we can use these as-is: */
+                return ctx->frag_coord[comp];
+        }
+}
+static struct ir3_instruction *
+create_frag_face(struct ir3_compile *ctx, unsigned comp)
+{
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction *instr;
+        switch (comp) {
+        case 0: /* .x */
+                compile_assert(ctx, !ctx->frag_face);
+                ctx->frag_face = create_input(block, NULL, 0);
+                /* for faceness, we always get -1 or 0 (int).. but TGSI expects
+                 * positive vs negative float.. and piglit further seems to
+                 * expect -1.0 or 1.0:
+                 *
+                 *    mul.s tmp, hr0.x, 2
+                 *    add.s tmp, tmp, 1
+                 *    mov.s32f32, dst, tmp
+                 *
+                 */
+                instr = ir3_MUL_S(block, ctx->frag_face, 0,
+                                create_immed(block, 2), 0);
+                instr = ir3_ADD_S(block, instr, 0,
+                                create_immed(block, 1), 0);
+                instr = ir3_COV(block, instr, TYPE_S32, TYPE_F32);
+                return instr;
+        case 1: /* .y */
+        case 2: /* .z */
+                return create_immed(block, fui(0.0));
+        default:
+        case 3: /* .w */
+                return create_immed(block, fui(1.0));
+        }
+}
+/* helper for instructions that produce multiple consecutive scalar
+ * outputs which need to have a split/fanout meta instruction inserted
+ */
+static void
+split_dest(struct ir3_block *block, struct ir3_instruction **dst,
+                struct ir3_instruction *src)
+{
+        struct ir3_instruction *prev = NULL;
+        for (int i = 0, j = 0; i < 4; i++) {
+                struct ir3_instruction *split =
+                                ir3_instr_create(block, -1, OPC_META_FO);
+                ir3_reg_create(split, 0, IR3_REG_SSA);
+                ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src;
+                split->fo.off = i;
+                if (prev) {
+                        split->cp.left = prev;
+                        split->cp.left_cnt++;
+                        prev->cp.right = split;
+                        prev->cp.right_cnt++;
+                }
+                prev = split;
+                if (src->regs[0]->wrmask & (1 << i))
+                        dst[j++] = split;
+        }
+}
+/*
+ * Adreno uses uint rather than having dedicated bool type,
+ * which (potentially) requires some conversion, in particular
+ * when using output of an bool instr to int input, or visa
+ * versa.
+ *
+ *         | Adreno  |  NIR  |
+ *  -------+---------+-------+-
+ *   true  |    1    |  ~0   |
+ *   false |    0    |   0   |
+ *
+ * To convert from an adreno bool (uint) to nir, use:
+ *
+ *    absneg.s dst, (neg)src
+ *
+ * To convert back in the other direction:
+ *
+ *    absneg.s dst, (abs)arc
+ *
+ * The CP step can clean up the absneg.s that cancel each other
+ * out, and with a slight bit of extra cleverness (to recognize
+ * the instructions which produce either a 0 or 1) can eliminate
+ * the absneg.s's completely when an instruction that wants
+ * 0/1 consumes the result.  For example, when a nir 'bcsel'
+ * consumes the result of 'feq'.  So we should be able to get by
+ * without a boolean resolve step, and without incuring any
+ * extra penalty in instruction count.
+ */
+/* NIR bool -> native (adreno): */
+static struct ir3_instruction *
+ir3_b2n(struct ir3_block *block, struct ir3_instruction *instr)
+{
+        return ir3_ABSNEG_S(block, instr, IR3_REG_SABS);
+}
+/* native (adreno) -> NIR bool: */
+static struct ir3_instruction *
+ir3_n2b(struct ir3_block *block, struct ir3_instruction *instr)
+{
+        return ir3_ABSNEG_S(block, instr, IR3_REG_SNEG);
+}
+/*
+ * alu/sfu instructions:
+ */
+static void
+emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
+{
+        const nir_op_info *info = &nir_op_infos[alu->op];
+        struct ir3_instruction **dst, *src[info->num_inputs];
+        struct ir3_block *b = ctx->block;
+        dst = get_dst(ctx, &alu->dest.dest, MAX2(info->output_size, 1));
+        /* Vectors are special in that they have non-scalarized writemasks,
+         * and just take the first swizzle channel for each argument in
+         * order into each writemask channel.
+         */
+        if ((alu->op == nir_op_vec2) ||
+                        (alu->op == nir_op_vec3) ||
+                        (alu->op == nir_op_vec4)) {
+                for (int i = 0; i < info->num_inputs; i++) {
+                        nir_alu_src *asrc = &alu->src[i];
+                        compile_assert(ctx, !asrc->abs);
+                        compile_assert(ctx, !asrc->negate);
+                        src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[0]];
+                        if (!src[i])
+                                src[i] = create_immed(ctx->block, 0);
+                        dst[i] = ir3_MOV(b, src[i], TYPE_U32);
+                }
+                return;
+        }
+        /* General case: We can just grab the one used channel per src. */
+        for (int i = 0; i < info->num_inputs; i++) {
+                unsigned chan = ffs(alu->dest.write_mask) - 1;
+                nir_alu_src *asrc = &alu->src[i];
+                compile_assert(ctx, !asrc->abs);
+                compile_assert(ctx, !asrc->negate);
+                src[i] = get_src(ctx, &asrc->src)[asrc->swizzle[chan]];
+                compile_assert(ctx, src[i]);
+        }
+        switch (alu->op) {
+        case nir_op_f2i:
+                dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_S32);
+                break;
+        case nir_op_f2u:
+                dst[0] = ir3_COV(b, src[0], TYPE_F32, TYPE_U32);
+                break;
+        case nir_op_i2f:
+                dst[0] = ir3_COV(b, src[0], TYPE_S32, TYPE_F32);
+                break;
+        case nir_op_u2f:
+                dst[0] = ir3_COV(b, src[0], TYPE_U32, TYPE_F32);
+                break;
+        case nir_op_imov:
+                dst[0] = ir3_MOV(b, src[0], TYPE_S32);
+                break;
+        case nir_op_fmov:
+                dst[0] = ir3_MOV(b, src[0], TYPE_F32);
+                break;
+        case nir_op_f2b:
+                dst[0] = ir3_CMPS_F(b, src[0], 0, create_immed(b, fui(0.0)), 0);
+                dst[0]->cat2.condition = IR3_COND_NE;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_b2f:
+                dst[0] = ir3_COV(b, ir3_b2n(b, src[0]), TYPE_U32, TYPE_F32);
+                break;
+        case nir_op_b2i:
+                dst[0] = ir3_b2n(b, src[0]);
+                break;
+        case nir_op_i2b:
+                dst[0] = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
+                dst[0]->cat2.condition = IR3_COND_NE;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_fneg:
+                dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FNEG);
+                break;
+        case nir_op_fabs:
+                dst[0] = ir3_ABSNEG_F(b, src[0], IR3_REG_FABS);
+                break;
+        case nir_op_fmax:
+                dst[0] = ir3_MAX_F(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_fmin:
+                dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_fmul:
+                dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_fadd:
+                dst[0] = ir3_ADD_F(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_fsub:
+                dst[0] = ir3_ADD_F(b, src[0], 0, src[1], IR3_REG_FNEG);
+                break;
+        case nir_op_ffma:
+                dst[0] = ir3_MAD_F32(b, src[0], 0, src[1], 0, src[2], 0);
+                break;
+        case nir_op_fddx:
+                dst[0] = ir3_DSX(b, src[0], 0);
+                dst[0]->cat5.type = TYPE_F32;
+                break;
+        case nir_op_fddy:
+                dst[0] = ir3_DSY(b, src[0], 0);
+                dst[0]->cat5.type = TYPE_F32;
+                break;
+                break;
+        case nir_op_flt:
+                dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_LT;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_fge:
+                dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_GE;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_feq:
+                dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_EQ;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_fne:
+                dst[0] = ir3_CMPS_F(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_NE;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_fceil:
+                dst[0] = ir3_CEIL_F(b, src[0], 0);
+                break;
+        case nir_op_ffloor:
+                dst[0] = ir3_FLOOR_F(b, src[0], 0);
+                break;
+        case nir_op_ftrunc:
+                dst[0] = ir3_TRUNC_F(b, src[0], 0);
+                break;
+        case nir_op_fround_even:
+                dst[0] = ir3_RNDNE_F(b, src[0], 0);
+                break;
+        case nir_op_fsign:
+                dst[0] = ir3_SIGN_F(b, src[0], 0);
+                break;
+        case nir_op_fsin:
+                dst[0] = ir3_SIN(b, src[0], 0);
+                break;
+        case nir_op_fcos:
+                dst[0] = ir3_COS(b, src[0], 0);
+                break;
+        case nir_op_frsq:
+                dst[0] = ir3_RSQ(b, src[0], 0);
+                break;
+        case nir_op_frcp:
+                dst[0] = ir3_RCP(b, src[0], 0);
+                break;
+        case nir_op_flog2:
+                dst[0] = ir3_LOG2(b, src[0], 0);
+                break;
+        case nir_op_fexp2:
+                dst[0] = ir3_EXP2(b, src[0], 0);
+                break;
+        case nir_op_fsqrt:
+                dst[0] = ir3_SQRT(b, src[0], 0);
+                break;
+        case nir_op_iabs:
+                dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SABS);
+                break;
+        case nir_op_iadd:
+                dst[0] = ir3_ADD_U(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_iand:
+                dst[0] = ir3_AND_B(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_imax:
+                dst[0] = ir3_MAX_S(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_imin:
+                dst[0] = ir3_MIN_S(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_imul:
+                /*
+                 * dst = (al * bl) + (ah * bl << 16) + (al * bh << 16)
+                 *   mull.u tmp0, a, b           ; mul low, i.e. al * bl
+                 *   madsh.m16 tmp1, a, b, tmp0  ; mul-add shift high mix, i.e. ah * bl << 16
+                 *   madsh.m16 dst, b, a, tmp1   ; i.e. al * bh << 16
+                 */
+                dst[0] = ir3_MADSH_M16(b, src[1], 0, src[0], 0,
+                                        ir3_MADSH_M16(b, src[0], 0, src[1], 0,
+                                                ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0);
+                break;
+        case nir_op_ineg:
+                dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG);
+                break;
+        case nir_op_inot:
+                dst[0] = ir3_NOT_B(b, src[0], 0);
+                break;
+        case nir_op_ior:
+                dst[0] = ir3_OR_B(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_ishl:
+                dst[0] = ir3_SHL_B(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_ishr:
+                dst[0] = ir3_ASHR_B(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_isign: {
+                /* maybe this would be sane to lower in nir.. */
+                struct ir3_instruction *neg, *pos;
+                neg = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
+                neg->cat2.condition = IR3_COND_LT;
+                pos = ir3_CMPS_S(b, src[0], 0, create_immed(b, 0), 0);
+                pos->cat2.condition = IR3_COND_GT;
+                dst[0] = ir3_SUB_U(b, pos, 0, neg, 0);
+                break;
+        }
+        case nir_op_isub:
+                dst[0] = ir3_SUB_U(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_ixor:
+                dst[0] = ir3_XOR_B(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_ushr:
+                dst[0] = ir3_SHR_B(b, src[0], 0, src[1], 0);
+                break;
+        case nir_op_ilt:
+                dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_LT;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_ige:
+                dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_GE;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_ieq:
+                dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_EQ;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_ine:
+                dst[0] = ir3_CMPS_S(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_NE;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_ult:
+                dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_LT;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_uge:
+                dst[0] = ir3_CMPS_U(b, src[0], 0, src[1], 0);
+                dst[0]->cat2.condition = IR3_COND_GE;
+                dst[0] = ir3_n2b(b, dst[0]);
+                break;
+        case nir_op_bcsel:
+                dst[0] = ir3_SEL_B32(b, src[1], 0, ir3_b2n(b, src[0]), 0, src[2], 0);
+                break;
+        default:
+                compile_error(ctx, "Unhandled ALU op: %s\n",
+                                nir_op_infos[alu->op].name);
+                break;
+        }
+}
+/* handles direct/indirect UBO reads: */
+static void
+emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
+                struct ir3_instruction **dst)
+{
+        struct ir3_block *b = ctx->block;
+        struct ir3_instruction *addr, *src0, *src1;
+        /* UBO addresses are the first driver params: */
+        unsigned ubo = regid(ctx->so->first_driver_param, 0);
+        unsigned off = intr->const_index[0];
+        /* First src is ubo index, which could either be an immed or not: */
+        src0 = get_src(ctx, &intr->src[0])[0];
+        if (is_same_type_mov(src0) &&
+                        (src0->regs[1]->flags & IR3_REG_IMMED)) {
+                addr = create_uniform(ctx, ubo + src0->regs[1]->iim_val);
+        } else {
+                addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0));
+        }
+        if (intr->intrinsic == nir_intrinsic_load_ubo_indirect) {
+                /* For load_ubo_indirect, second src is indirect offset: */
+                src1 = get_src(ctx, &intr->src[1])[0];
+                /* and add offset to addr: */
+                addr = ir3_ADD_S(b, addr, 0, src1, 0);
+        }
+        /* if offset is to large to encode in the ldg, split it out: */
+        if ((off + (intr->num_components * 4)) > 1024) {
+                /* split out the minimal amount to improve the odds that
+                 * cp can fit the immediate in the add.s instruction:
+                 */
+                unsigned off2 = off + (intr->num_components * 4) - 1024;
+                addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0);
+                off -= off2;
+        }
+        for (int i = 0; i < intr->num_components; i++) {
+                struct ir3_instruction *load =
+                                ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
+                load->cat6.type = TYPE_U32;
+                load->cat6.offset = off + i * 4;    /* byte offset */
+                dst[i] = load;
+        }
+}
+/* handles array reads: */
+static void
+emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
+                struct ir3_instruction **dst)
+{
+        nir_deref_var *dvar = intr->variables[0];
+        nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
+        struct ir3_array *arr = get_var(ctx, dvar->var);
+        compile_assert(ctx, dvar->deref.child &&
+                (dvar->deref.child->deref_type == nir_deref_type_array));
+        switch (darr->deref_array_type) {
+        case nir_deref_array_type_direct:
+                /* direct access does not require anything special: */
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = darr->base_offset * 4 + i;
+                        compile_assert(ctx, n < arr->length);
+                        dst[i] = arr->arr[n];
+                }
+                break;
+        case nir_deref_array_type_indirect: {
+                /* for indirect, we need to collect all the array elements: */
+                struct ir3_instruction *collect =
+                                create_collect(ctx->block, arr->arr, arr->length);
+                struct ir3_instruction *addr =
+                                get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = darr->base_offset * 4 + i;
+                        compile_assert(ctx, n < arr->length);
+                        dst[i] = create_indirect_load(ctx, arr->length, n, addr, collect);
+                }
+                break;
+        }
+        default:
+                compile_error(ctx, "Unhandled load deref type: %u\n",
+                                darr->deref_array_type);
+                break;
+        }
+}
+/* handles array writes: */
+static void
+emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
+{
+        nir_deref_var *dvar = intr->variables[0];
+        nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
+        struct ir3_array *arr = get_var(ctx, dvar->var);
+        struct ir3_instruction **src;
+        compile_assert(ctx, dvar->deref.child &&
+                (dvar->deref.child->deref_type == nir_deref_type_array));
+        src = get_src(ctx, &intr->src[0]);
+        switch (darr->deref_array_type) {
+        case nir_deref_array_type_direct:
+                /* direct access does not require anything special: */
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = darr->base_offset * 4 + i;
+                        compile_assert(ctx, n < arr->length);
+                        arr->arr[n] = src[i];
+                }
+                break;
+        case nir_deref_array_type_indirect: {
+                /* for indirect, create indirect-store and fan that out: */
+                struct ir3_instruction *collect =
+                                create_collect(ctx->block, arr->arr, arr->length);
+                struct ir3_instruction *addr =
+                                get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
+                for (int i = 0; i < intr->num_components; i++) {
+                        struct ir3_instruction *store;
+                        unsigned n = darr->base_offset * 4 + i;
+                        compile_assert(ctx, n < arr->length);
+                        store = create_indirect_store(ctx, arr->length,
+                                        n, src[i], addr, collect);
+                        store->fanin->fi.aid = arr->aid;
+                        /* TODO: probably split this out to be used for
+                         * store_output_indirect? or move this into
+                         * create_indirect_store()?
+                         */
+                        for (int j = i; j < arr->length; j += 4) {
+                                struct ir3_instruction *split;
+                                split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
+                                split->fo.off = j;
+                                ir3_reg_create(split, 0, 0);
+                                ir3_reg_create(split, 0, IR3_REG_SSA)->instr = store;
+                                arr->arr[j] = split;
+                        }
+                }
+                break;
+        }
+        default:
+                compile_error(ctx, "Unhandled store deref type: %u\n",
+                                darr->deref_array_type);
+                break;
+        }
+}
+static void add_sysval_input(struct ir3_compile *ctx, unsigned name,
+                struct ir3_instruction *instr)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        unsigned r = regid(so->inputs_count, 0);
+        unsigned n = so->inputs_count++;
+        so->inputs[n].semantic = ir3_semantic_name(name, 0);
+        so->inputs[n].compmask = 1;
+        so->inputs[n].regid = r;
+        so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
+        so->total_in++;
+        ctx->block->ninputs = MAX2(ctx->block->ninputs, r + 1);
+        ctx->block->inputs[r] = instr;
+}
+static void
+emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
+{
+        const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
+        struct ir3_instruction **dst, **src;
+        struct ir3_block *b = ctx->block;
+        unsigned idx = intr->const_index[0];
+        if (info->has_dest) {
+                dst = get_dst(ctx, &intr->dest, intr->num_components);
+        }
+        switch (intr->intrinsic) {
+        case nir_intrinsic_load_uniform:
+                compile_assert(ctx, intr->const_index[1] == 1);
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = idx * 4 + i;
+                        dst[i] = create_uniform(ctx, n);
+                }
+                break;
+        case nir_intrinsic_load_uniform_indirect:
+                compile_assert(ctx, intr->const_index[1] == 1);
+                src = get_src(ctx, &intr->src[0]);
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = idx * 4 + i;
+                        dst[i] = create_uniform_indirect(ctx, n,
+                                        get_addr(ctx, src[0]));
+                }
+                break;
+        case nir_intrinsic_load_ubo:
+        case nir_intrinsic_load_ubo_indirect:
+                emit_intrinsic_load_ubo(ctx, intr, dst);
+                break;
+        case nir_intrinsic_load_input:
+                compile_assert(ctx, intr->const_index[1] == 1);
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = idx * 4 + i;
+                        dst[i] = b->inputs[n];
+                }
+                break;
+        case nir_intrinsic_load_input_indirect:
+                compile_assert(ctx, intr->const_index[1] == 1);
+                src = get_src(ctx, &intr->src[0]);
+                struct ir3_instruction *collect =
+                                create_collect(b, b->inputs, b->ninputs);
+                struct ir3_instruction *addr = get_addr(ctx, src[0]);
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = idx * 4 + i;
+                        dst[i] = create_indirect_load(ctx, b->ninputs, n, addr, collect);
+                }
+                break;
+        case nir_intrinsic_load_var:
+                emit_intrinisic_load_var(ctx, intr, dst);
+                break;
+        case nir_intrinsic_store_var:
+                emit_intrinisic_store_var(ctx, intr);
+                break;
+        case nir_intrinsic_store_output:
+                compile_assert(ctx, intr->const_index[1] == 1);
+                src = get_src(ctx, &intr->src[0]);
+                for (int i = 0; i < intr->num_components; i++) {
+                        unsigned n = idx * 4 + i;
+                        b->outputs[n] = src[i];
+                }
+                break;
+        case nir_intrinsic_load_base_vertex:
+                if (!ctx->basevertex) {
+                        /* first four vec4 sysval's reserved for UBOs: */
+                        unsigned r = regid(ctx->so->first_driver_param + 4, 0);
+                        ctx->basevertex = create_uniform(ctx, r);
+                        add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX,
+                                        ctx->basevertex);
+                }
+                dst[0] = ctx->basevertex;
+                break;
+        case nir_intrinsic_load_vertex_id_zero_base:
+                if (!ctx->vertex_id) {
+                        ctx->vertex_id = create_input(ctx->block, NULL, 0);
+                        add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE,
+                                        ctx->vertex_id);
+                }
+                dst[0] = ctx->vertex_id;
+                break;
+        case nir_intrinsic_load_instance_id:
+                if (!ctx->instance_id) {
+                        ctx->instance_id = create_input(ctx->block, NULL, 0);
+                        add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID,
+                                        ctx->instance_id);
+                }
+                dst[0] = ctx->instance_id;
+                break;
+        case nir_intrinsic_discard_if:
+        case nir_intrinsic_discard: {
+                struct ir3_instruction *cond, *kill;
+                if (intr->intrinsic == nir_intrinsic_discard_if) {
+                        /* conditional discard: */
+                        src = get_src(ctx, &intr->src[0]);
+                        cond = ir3_b2n(b, src[0]);
+                } else {
+                        /* unconditional discard: */
+                        cond = create_immed(b, 1);
+                }
+                cond = ir3_CMPS_S(b, cond, 0, create_immed(b, 0), 0);
+                cond->cat2.condition = IR3_COND_NE;
+                /* condition always goes in predicate register: */
+                cond->regs[0]->num = regid(REG_P0, 0);
+                kill = ir3_KILL(b, cond, 0);
+                ctx->kill[ctx->kill_count++] = kill;
+                ctx->so->has_kill = true;
+                break;
+        }
+        default:
+                compile_error(ctx, "Unhandled intrinsic type: %s\n",
+                                nir_intrinsic_infos[intr->intrinsic].name);
+                break;
+        }
+}
+static void
+emit_load_const(struct ir3_compile *ctx, nir_load_const_instr *instr)
+{
+        struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def,
+                        instr->def.num_components);
+        for (int i = 0; i < instr->def.num_components; i++)
+                dst[i] = create_immed(ctx->block, instr->value.u[i]);
+}
+static void
+emit_undef(struct ir3_compile *ctx, nir_ssa_undef_instr *undef)
+{
+        struct ir3_instruction **dst = get_dst_ssa(ctx, &undef->def,
+                        undef->def.num_components);
+        /* backend doesn't want undefined instructions, so just plug
+         * in 0.0..
+         */
+        for (int i = 0; i < undef->def.num_components; i++)
+                dst[i] = create_immed(ctx->block, fui(0.0));
+}
+/*
+ * texture fetch/sample instructions:
+ */
+static void
+tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned *coordsp)
+{
+        unsigned coords, flags = 0;
+        /* note: would use tex->coord_components.. except txs.. also,
+         * since array index goes after shadow ref, we don't want to
+         * count it:
+         */
+        switch (tex->sampler_dim) {
+        case GLSL_SAMPLER_DIM_1D:
+        case GLSL_SAMPLER_DIM_BUF:
+                coords = 1;
+                break;
+        case GLSL_SAMPLER_DIM_2D:
+        case GLSL_SAMPLER_DIM_RECT:
+        case GLSL_SAMPLER_DIM_EXTERNAL:
+        case GLSL_SAMPLER_DIM_MS:
+                coords = 2;
+                break;
+        case GLSL_SAMPLER_DIM_3D:
+        case GLSL_SAMPLER_DIM_CUBE:
+                coords = 3;
+                flags |= IR3_INSTR_3D;
+                break;
+        }
+        if (tex->is_shadow)
+                flags |= IR3_INSTR_S;
+        if (tex->is_array)
+                flags |= IR3_INSTR_A;
+        *flagsp = flags;
+        *coordsp = coords;
+}
+static void
+emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
+{
+        struct ir3_block *b = ctx->block;
+        struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
+        struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
+        bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
+        unsigned i, coords, flags;
+        unsigned nsrc0 = 0, nsrc1 = 0;
+        type_t type;
+        opc_t opc;
+        /* TODO: might just be one component for gathers? */
+        dst = get_dst(ctx, &tex->dest, 4);
+        for (unsigned i = 0; i < tex->num_srcs; i++) {
+                switch (tex->src[i].src_type) {
+                case nir_tex_src_coord:
+                        coord = get_src(ctx, &tex->src[i].src);
+                        break;
+                case nir_tex_src_bias:
+                        lod = get_src(ctx, &tex->src[i].src)[0];
+                        has_bias = true;
+                        break;
+                case nir_tex_src_lod:
+                        lod = get_src(ctx, &tex->src[i].src)[0];
+                        has_lod = true;
+                        break;
+                case nir_tex_src_comparitor: /* shadow comparator */
+                        compare = get_src(ctx, &tex->src[i].src)[0];
+                        break;
+                case nir_tex_src_projector:
+                        proj = get_src(ctx, &tex->src[i].src)[0];
+                        has_proj = true;
+                        break;
+                case nir_tex_src_offset:
+                        off = get_src(ctx, &tex->src[i].src);
+                        has_off = true;
+                        break;
+                case nir_tex_src_ddx:
+                        ddx = get_src(ctx, &tex->src[i].src);
+                        break;
+                case nir_tex_src_ddy:
+                        ddy = get_src(ctx, &tex->src[i].src);
+                        break;
+                default:
+                        compile_error(ctx, "Unhandled NIR tex serc type: %d\n",
+                                        tex->src[i].src_type);
+                        return;
+                }
+        }
+        switch (tex->op) {
+        case nir_texop_tex:      opc = OPC_SAM;      break;
+        case nir_texop_txb:      opc = OPC_SAMB;     break;
+        case nir_texop_txl:      opc = OPC_SAML;     break;
+        case nir_texop_txd:      opc = OPC_SAMGQ;    break;
+        case nir_texop_txf:      opc = OPC_ISAML;    break;
+        case nir_texop_txf_ms:
+        case nir_texop_txs:
+        case nir_texop_lod:
+        case nir_texop_tg4:
+        case nir_texop_query_levels:
+                compile_error(ctx, "Unhandled NIR tex type: %d\n", tex->op);
+                return;
+        }
+        tex_info(tex, &flags, &coords);
+        /* scale up integer coords for TXF based on the LOD */
+        if (opc == OPC_ISAML) {
+                assert(has_lod);
+                for (i = 0; i < coords; i++)
+                        coord[i] = ir3_SHL_B(b, coord[i], 0, lod, 0);
+        }
+        /*
+         * lay out the first argument in the proper order:
+         *  - actual coordinates first
+         *  - shadow reference
+         *  - array index
+         *  - projection w
+         *  - starting at offset 4, dpdx.xy, dpdy.xy
+         *
+         * bias/lod go into the second arg
+         */
+        /* insert tex coords: */
+        for (i = 0; i < coords; i++)
+                src0[nsrc0++] = coord[i];
+        if (coords == 1) {
+                /* hw doesn't do 1d, so we treat it as 2d with
+                 * height of 1, and patch up the y coord.
+                 * TODO: y coord should be (int)0 in some cases..
+                 */
+                src0[nsrc0++] = create_immed(b, fui(0.5));
+        }
+        if (tex->is_shadow)
+                src0[nsrc0++] = compare;
+        if (tex->is_array)
+                src0[nsrc0++] = coord[coords];
+        if (has_proj) {
+                src0[nsrc0++] = proj;
+                flags |= IR3_INSTR_P;
+        }
+        /* pad to 4, then ddx/ddy: */
+        if (tex->op == nir_texop_txd) {
+                while (nsrc0 < 4)
+                        src0[nsrc0++] = create_immed(b, fui(0.0));
+                for (i = 0; i < coords; i++)
+                        src0[nsrc0++] = ddx[i];
+                if (coords < 2)
+                        src0[nsrc0++] = create_immed(b, fui(0.0));
+                for (i = 0; i < coords; i++)
+                        src0[nsrc0++] = ddy[i];
+                if (coords < 2)
+                        src0[nsrc0++] = create_immed(b, fui(0.0));
+        }
+        /*
+         * second argument (if applicable):
+         *  - offsets
+         *  - lod
+         *  - bias
+         */
+        if (has_off | has_lod | has_bias) {
+                if (has_off) {
+                        for (i = 0; i < coords; i++)
+                                src1[nsrc1++] = off[i];
+                        if (coords < 2)
+                                src1[nsrc1++] = create_immed(b, fui(0.0));
+                        flags |= IR3_INSTR_O;
+                }
+                if (has_lod | has_bias)
+                        src1[nsrc1++] = lod;
+        }
+        switch (tex->dest_type) {
+        case nir_type_invalid:
+        case nir_type_float:
+                type = TYPE_F32;
+                break;
+        case nir_type_int:
+                type = TYPE_S32;
+                break;
+        case nir_type_unsigned:
+        case nir_type_bool:
+                type = TYPE_U32;
+                break;
+        }
+        sam = ir3_SAM(b, opc, type, TGSI_WRITEMASK_XYZW,
+                        flags, tex->sampler_index, tex->sampler_index,
+                        create_collect(b, src0, nsrc0),
+                        create_collect(b, src1, nsrc1));
+        split_dest(b, dst, sam);
+}
+static void
+emit_tex_query_levels(struct ir3_compile *ctx, nir_tex_instr *tex)
+{
+        struct ir3_block *b = ctx->block;
+        struct ir3_instruction **dst, *sam;
+        dst = get_dst(ctx, &tex->dest, 1);
+        sam = ir3_SAM(b, OPC_GETINFO, TYPE_U32, TGSI_WRITEMASK_Z, 0,
+                        tex->sampler_index, tex->sampler_index, NULL, NULL);
+        /* even though there is only one component, since it ends
+         * up in .z rather than .x, we need a split_dest()
+         */
+        split_dest(b, dst, sam);
+        /* The # of levels comes from getinfo.z. We need to add 1 to it, since
+         * the value in TEX_CONST_0 is zero-based.
+         */
+        if (ctx->levels_add_one)
+                dst[0] = ir3_ADD_U(b, dst[0], 0, create_immed(b, 1), 0);
+}
+static void
+emit_tex_txs(struct ir3_compile *ctx, nir_tex_instr *tex)
+{
+        struct ir3_block *b = ctx->block;
+        struct ir3_instruction **dst, *sam, *lod;
+        unsigned flags, coords;
+        tex_info(tex, &flags, &coords);
+        dst = get_dst(ctx, &tex->dest, 4);
+        compile_assert(ctx, tex->num_srcs == 1);
+        compile_assert(ctx, tex->src[0].src_type == nir_tex_src_lod);
+        lod = get_src(ctx, &tex->src[0].src)[0];
+        sam = ir3_SAM(b, OPC_GETSIZE, TYPE_U32, TGSI_WRITEMASK_XYZW, flags,
+                        tex->sampler_index, tex->sampler_index, lod, NULL);
+        split_dest(b, dst, sam);
+        /* Array size actually ends up in .w rather than .z. This doesn't
+         * matter for miplevel 0, but for higher mips the value in z is
+         * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+         * returned, which means that we have to add 1 to it for arrays.
+         */
+        if (tex->is_array) {
+                if (ctx->levels_add_one) {
+                        dst[coords] = ir3_ADD_U(b, dst[3], 0, create_immed(b, 1), 0);
+                } else {
+                        dst[coords] = ir3_MOV(b, dst[3], TYPE_U32);
+                }
+        }
+}
+static void
+emit_instr(struct ir3_compile *ctx, nir_instr *instr)
+{
+        switch (instr->type) {
+        case nir_instr_type_alu:
+                emit_alu(ctx, nir_instr_as_alu(instr));
+                break;
+        case nir_instr_type_intrinsic:
+                emit_intrinisic(ctx, nir_instr_as_intrinsic(instr));
+                break;
+        case nir_instr_type_load_const:
+                emit_load_const(ctx, nir_instr_as_load_const(instr));
+                break;
+        case nir_instr_type_ssa_undef:
+                emit_undef(ctx, nir_instr_as_ssa_undef(instr));
+                break;
+        case nir_instr_type_tex: {
+                nir_tex_instr *tex = nir_instr_as_tex(instr);
+                /* couple tex instructions get special-cased:
+                 */
+                switch (tex->op) {
+                case nir_texop_txs:
+                        emit_tex_txs(ctx, tex);
+                        break;
+                case nir_texop_query_levels:
+                        emit_tex_query_levels(ctx, tex);
+                        break;
+                default:
+                        emit_tex(ctx, tex);
+                        break;
+                }
+                break;
+        }
+        case nir_instr_type_call:
+        case nir_instr_type_jump:
+        case nir_instr_type_phi:
+        case nir_instr_type_parallel_copy:
+                compile_error(ctx, "Unhandled NIR instruction type: %d\n", instr->type);
+                break;
+        }
+}
+static void
+emit_block(struct ir3_compile *ctx, nir_block *block)
+{
+        nir_foreach_instr(block, instr) {
+                emit_instr(ctx, instr);
+                if (ctx->error)
+                        return;
+        }
+}
+static void
+emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
+{
+        foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+                switch (node->type) {
+                case nir_cf_node_block:
+                        emit_block(ctx, nir_cf_node_as_block(node));
+                        break;
+                case nir_cf_node_if:
+                case nir_cf_node_loop:
+                case nir_cf_node_function:
+                        compile_error(ctx, "TODO\n");
+                        break;
+                }
+                if (ctx->error)
+                        return;
+        }
+}
+static void
+setup_input(struct ir3_compile *ctx, nir_variable *in)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        unsigned array_len = MAX2(glsl_get_length(in->type), 1);
+        unsigned ncomp = glsl_get_components(in->type);
+        /* XXX: map loc slots to semantics */
+        unsigned semantic_name = in->data.location;
+        unsigned semantic_index = in->data.index;
+        unsigned n = in->data.driver_location;
+        DBG("; in: %u:%u, len=%ux%u, loc=%u\n",
+                        semantic_name, semantic_index, array_len,
+                        ncomp, n);
+        so->inputs[n].semantic =
+                        ir3_semantic_name(semantic_name, semantic_index);
+        so->inputs[n].compmask = (1 << ncomp) - 1;
+        so->inputs[n].inloc = ctx->next_inloc;
+        so->inputs[n].interpolate = 0;
+        so->inputs_count = MAX2(so->inputs_count, n + 1);
+        /* the fdN_program_emit() code expects tgsi consts here, so map
+         * things back to tgsi for now:
+         */
+        switch (in->data.interpolation) {
+        case INTERP_QUALIFIER_FLAT:
+                so->inputs[n].interpolate = TGSI_INTERPOLATE_CONSTANT;
+                break;
+        case INTERP_QUALIFIER_NOPERSPECTIVE:
+                so->inputs[n].interpolate = TGSI_INTERPOLATE_LINEAR;
+                break;
+        case INTERP_QUALIFIER_SMOOTH:
+                so->inputs[n].interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+                break;
+        }
+        for (int i = 0; i < ncomp; i++) {
+                struct ir3_instruction *instr = NULL;
+                unsigned idx = (n * 4) + i;
+                if (ctx->so->type == SHADER_FRAGMENT) {
+                        if (semantic_name == TGSI_SEMANTIC_POSITION) {
+                                so->inputs[n].bary = false;
+                                so->frag_coord = true;
+                                instr = create_frag_coord(ctx, i);
+                        } else if (semantic_name == TGSI_SEMANTIC_FACE) {
+                                so->inputs[n].bary = false;
+                                so->frag_face = true;
+                                instr = create_frag_face(ctx, i);
+                        } else {
+                                bool use_ldlv = false;
+                                /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
+                                 * from the semantic name:
+                                 */
+                                if ((in->data.interpolation == INTERP_QUALIFIER_NONE) &&
+                                                ((semantic_name == TGSI_SEMANTIC_COLOR) ||
+                                                        (semantic_name == TGSI_SEMANTIC_BCOLOR)))
+                                        so->inputs[n].interpolate = TGSI_INTERPOLATE_COLOR;
+                                if (ctx->flat_bypass) {
+                                        /* with NIR, we need to infer TGSI_INTERPOLATE_COLOR
+                                         * from the semantic name:
+                                         */
+                                        switch (so->inputs[n].interpolate) {
+                                        case TGSI_INTERPOLATE_COLOR:
+                                                if (!ctx->so->key.rasterflat)
+                                                        break;
+                                                /* fallthrough */
+                                        case TGSI_INTERPOLATE_CONSTANT:
+                                                use_ldlv = true;
+                                                break;
+                                        }
+                                }
+                                so->inputs[n].bary = true;
+                                instr = create_frag_input(ctx,
+                                                so->inputs[n].inloc + i - 8, use_ldlv);
+                        }
+                } else {
+                        instr = create_input(ctx->block, NULL, idx);
+                }
+                ctx->block->inputs[idx] = instr;
+        }
+        if (so->inputs[n].bary || (ctx->so->type == SHADER_VERTEX)) {
+                ctx->next_inloc += ncomp;
+                so->total_in += ncomp;
+        }
+}
+static void
+setup_output(struct ir3_compile *ctx, nir_variable *out)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        unsigned array_len = MAX2(glsl_get_length(out->type), 1);
+        unsigned ncomp = glsl_get_components(out->type);
+        /* XXX: map loc slots to semantics */
+        unsigned semantic_name = out->data.location;
+        unsigned semantic_index = out->data.index;
+        unsigned n = out->data.driver_location;
+        unsigned comp = 0;
+        DBG("; out: %u:%u, len=%ux%u, loc=%u\n",
+                        semantic_name, semantic_index, array_len,
+                        ncomp, n);
+        if (ctx->so->type == SHADER_VERTEX) {
+                switch (semantic_name) {
+                case TGSI_SEMANTIC_POSITION:
+                        so->writes_pos = true;
+                        break;
+                case TGSI_SEMANTIC_PSIZE:
+                        so->writes_psize = true;
+                        break;
+                case TGSI_SEMANTIC_COLOR:
+                case TGSI_SEMANTIC_BCOLOR:
+                case TGSI_SEMANTIC_GENERIC:
+                case TGSI_SEMANTIC_FOG:
+                case TGSI_SEMANTIC_TEXCOORD:
+                        break;
+                default:
+                        compile_error(ctx, "unknown VS semantic name: %s\n",
+                                        tgsi_semantic_names[semantic_name]);
+                }
+        } else {
+                switch (semantic_name) {
+                case TGSI_SEMANTIC_POSITION:
+                        comp = 2;  /* tgsi will write to .z component */
+                        so->writes_pos = true;
+                        break;
+                case TGSI_SEMANTIC_COLOR:
+                        break;
+                default:
+                        compile_error(ctx, "unknown FS semantic name: %s\n",
+                                        tgsi_semantic_names[semantic_name]);
+                }
+        }
+        compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
+        so->outputs[n].semantic =
+                        ir3_semantic_name(semantic_name, semantic_index);
+        so->outputs[n].regid = regid(n, comp);
+        so->outputs_count = MAX2(so->outputs_count, n + 1);
+        for (int i = 0; i < ncomp; i++) {
+                unsigned idx = (n * 4) + i;
+                ctx->block->outputs[idx] = create_immed(ctx->block, fui(0.0));
+        }
+}
+static void
+emit_instructions(struct ir3_compile *ctx)
+{
+        unsigned ninputs  = exec_list_length(&ctx->s->inputs) * 4;
+        unsigned noutputs = exec_list_length(&ctx->s->outputs) * 4;
+        /* we need to allocate big enough outputs array so that
+         * we can stuff the kill's at the end.  Likewise for vtx
+         * shaders, we need to leave room for sysvals:
+         */
+        if (ctx->so->type == SHADER_FRAGMENT) {
+                noutputs += ARRAY_SIZE(ctx->kill);
+        } else if (ctx->so->type == SHADER_VERTEX) {
+                ninputs += 8;
+        }
+        ctx->block = ir3_block_create(ctx->ir, 0, ninputs, noutputs);
+        if (ctx->so->type == SHADER_FRAGMENT) {
+                ctx->block->noutputs -= ARRAY_SIZE(ctx->kill);
+        } else if (ctx->so->type == SHADER_VERTEX) {
+                ctx->block->ninputs -= 8;
+        }
+        /* for fragment shader, we have a single input register (usually
+         * r0.xy) which is used as the base for bary.f varying fetch instrs:
+         */
+        if (ctx->so->type == SHADER_FRAGMENT) {
+                // TODO maybe a helper for fi since we need it a few places..
+                struct ir3_instruction *instr;
+                instr = ir3_instr_create(ctx->block, -1, OPC_META_FI);
+                ir3_reg_create(instr, 0, 0);
+                ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.x */
+                ir3_reg_create(instr, 0, IR3_REG_SSA);    /* r0.y */
+                ctx->frag_pos = instr;
+        }
+        /* Setup inputs: */
+        foreach_list_typed(nir_variable, var, node, &ctx->s->inputs) {
+                setup_input(ctx, var);
+        }
+        /* Setup outputs: */
+        foreach_list_typed(nir_variable, var, node, &ctx->s->outputs) {
+                setup_output(ctx, var);
+        }
+        /* Setup variables (which should only be arrays): */
+        foreach_list_typed(nir_variable, var, node, &ctx->s->globals) {
+                declare_var(ctx, var);
+        }
+        /* Find the main function and emit the body: */
+        nir_foreach_overload(ctx->s, overload) {
+                compile_assert(ctx, strcmp(overload->function->name, "main") == 0);
+                compile_assert(ctx, overload->impl);
+                emit_function(ctx, overload->impl);
+                if (ctx->error)
+                        return;
+        }
+}
+/* from NIR perspective, we actually have inputs.  But most of the "inputs"
+ * for a fragment shader are just bary.f instructions.  The *actual* inputs
+ * from the hw perspective are the frag_pos and optionally frag_coord and
+ * frag_face.
+ */
+static void
+fixup_frag_inputs(struct ir3_compile *ctx)
+{
+        struct ir3_shader_variant *so = ctx->so;
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction **inputs;
+        struct ir3_instruction *instr;
+        int n, regid = 0;
+        block->ninputs = 0;
+        n  = 4;  /* always have frag_pos */
+        n += COND(so->frag_face, 4);
+        n += COND(so->frag_coord, 4);
+        inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
+        if (so->frag_face) {
+                /* this ultimately gets assigned to hr0.x so doesn't conflict
+                 * with frag_coord/frag_pos..
+                 */
+                inputs[block->ninputs++] = ctx->frag_face;
+                ctx->frag_face->regs[0]->num = 0;
+                /* remaining channels not used, but let's avoid confusing
+                 * other parts that expect inputs to come in groups of vec4
+                 */
+                inputs[block->ninputs++] = NULL;
+                inputs[block->ninputs++] = NULL;
+                inputs[block->ninputs++] = NULL;
+        }
+        /* since we don't know where to set the regid for frag_coord,
+         * we have to use r0.x for it.  But we don't want to *always*
+         * use r1.x for frag_pos as that could increase the register
+         * footprint on simple shaders:
+         */
+        if (so->frag_coord) {
+                ctx->frag_coord[0]->regs[0]->num = regid++;
+                ctx->frag_coord[1]->regs[0]->num = regid++;
+                ctx->frag_coord[2]->regs[0]->num = regid++;
+                ctx->frag_coord[3]->regs[0]->num = regid++;
+                inputs[block->ninputs++] = ctx->frag_coord[0];
+                inputs[block->ninputs++] = ctx->frag_coord[1];
+                inputs[block->ninputs++] = ctx->frag_coord[2];
+                inputs[block->ninputs++] = ctx->frag_coord[3];
+        }
+        /* we always have frag_pos: */
+        so->pos_regid = regid;
+        /* r0.x */
+        instr = create_input(block, NULL, block->ninputs);
+        instr->regs[0]->num = regid++;
+        inputs[block->ninputs++] = instr;
+        ctx->frag_pos->regs[1]->instr = instr;
+        /* r0.y */
+        instr = create_input(block, NULL, block->ninputs);
+        instr->regs[0]->num = regid++;
+        inputs[block->ninputs++] = instr;
+        ctx->frag_pos->regs[2]->instr = instr;
+        block->inputs = inputs;
+}
+static void
+compile_dump(struct ir3_compile *ctx)
+{
+        const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag";
+        static unsigned n = 0;
+        char fname[16];
+        FILE *f;
+        snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++);
+        f = fopen(fname, "w");
+        if (!f)
+                return;
+        ir3_block_depth(ctx->block);
+        ir3_dump(ctx->ir, name, ctx->block, f);
+        fclose(f);
+}
+int
+ir3_compile_shader_nir(struct ir3_shader_variant *so,
+                const struct tgsi_token *tokens, struct ir3_shader_key key)
+{
+        struct ir3_compile *ctx;
+        struct ir3_block *block;
+        struct ir3_instruction **inputs;
+        unsigned i, j, actual_in;
+        int ret = 0, max_bary;
+        assert(!so->ir);
+        so->ir = ir3_create();
+        assert(so->ir);
+        ctx = compile_init(so, tokens);
+        if (!ctx) {
+                DBG("INIT failed!");
+                ret = -1;
+                goto out;
+        }
+        emit_instructions(ctx);
+        if (ctx->error) {
+                DBG("EMIT failed!");
+                ret = -1;
+                goto out;
+        }
+        block = ctx->block;
+        so->ir->block = block;
+        /* keep track of the inputs from TGSI perspective.. */
+        inputs = block->inputs;
+        /* but fixup actual inputs for frag shader: */
+        if (so->type == SHADER_FRAGMENT)
+                fixup_frag_inputs(ctx);
+        /* at this point, for binning pass, throw away unneeded outputs: */
+        if (key.binning_pass) {
+                for (i = 0, j = 0; i < so->outputs_count; i++) {
+                        unsigned name = sem2name(so->outputs[i].semantic);
+                        unsigned idx = sem2idx(so->outputs[i].semantic);
+                        /* throw away everything but first position/psize */
+                        if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
+                                        (name == TGSI_SEMANTIC_PSIZE))) {
+                                if (i != j) {
+                                        so->outputs[j] = so->outputs[i];
+                                        block->outputs[(j*4)+0] = block->outputs[(i*4)+0];
+                                        block->outputs[(j*4)+1] = block->outputs[(i*4)+1];
+                                        block->outputs[(j*4)+2] = block->outputs[(i*4)+2];
+                                        block->outputs[(j*4)+3] = block->outputs[(i*4)+3];
+                                }
+                                j++;
+                        }
+                }
+                so->outputs_count = j;
+                block->noutputs = j * 4;
+        }
+        /* if we want half-precision outputs, mark the output registers
+         * as half:
+         */
+        if (key.half_precision) {
+                for (i = 0; i < block->noutputs; i++) {
+                        if (!block->outputs[i])
+                                continue;
+                        block->outputs[i]->regs[0]->flags |= IR3_REG_HALF;
+                }
+        }
+        /* at this point, we want the kill's in the outputs array too,
+         * so that they get scheduled (since they have no dst).. we've
+         * already ensured that the array is big enough in push_block():
+         */
+        if (so->type == SHADER_FRAGMENT) {
+                for (i = 0; i < ctx->kill_count; i++)
+                        block->outputs[block->noutputs++] = ctx->kill[i];
+        }
+        if (fd_mesa_debug & FD_DBG_OPTDUMP)
+                compile_dump(ctx);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("BEFORE CP:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ir3_block_depth(block);
+        ir3_block_cp(block);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("BEFORE GROUPING:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        /* Group left/right neighbors, inserting mov's where needed to
+         * solve conflicts:
+         */
+        ir3_block_group(block);
+        if (fd_mesa_debug & FD_DBG_OPTDUMP)
+                compile_dump(ctx);
+        ir3_block_depth(block);
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("AFTER DEPTH:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ret = ir3_block_sched(block);
+        if (ret) {
+                DBG("SCHED failed!");
+                goto out;
+        }
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("AFTER SCHED:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ret = ir3_block_ra(block, so->type, so->frag_coord, so->frag_face);
+        if (ret) {
+                DBG("RA failed!");
+                goto out;
+        }
+        if (fd_mesa_debug & FD_DBG_OPTMSGS) {
+                printf("AFTER RA:\n");
+                ir3_dump_instr_list(block->head);
+        }
+        ir3_block_legalize(block, &so->has_samp, &max_bary);
+        /* fixup input/outputs: */
+        for (i = 0; i < so->outputs_count; i++) {
+                so->outputs[i].regid = block->outputs[i*4]->regs[0]->num;
+                /* preserve hack for depth output.. tgsi writes depth to .z,
+                 * but what we give the hw is the scalar register:
+                 */
+                if ((so->type == SHADER_FRAGMENT) &&
+                        (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION))
+                        so->outputs[i].regid += 2;
+        }
+        /* Note that some or all channels of an input may be unused: */
+        actual_in = 0;
+        for (i = 0; i < so->inputs_count; i++) {
+                unsigned j, regid = ~0, compmask = 0;
+                so->inputs[i].ncomp = 0;
+                for (j = 0; j < 4; j++) {
+                        struct ir3_instruction *in = inputs[(i*4) + j];
+                        if (in) {
+                                compmask |= (1 << j);
+                                regid = in->regs[0]->num - j;
+                                actual_in++;
+                                so->inputs[i].ncomp++;
+                        }
+                }
+                so->inputs[i].regid = regid;
+                so->inputs[i].compmask = compmask;
+        }
+        /* fragment shader always gets full vec4's even if it doesn't
+         * fetch all components, but vertex shader we need to update
+         * with the actual number of components fetch, otherwise thing
+         * will hang due to mismaptch between VFD_DECODE's and
+         * TOTALATTRTOVS
+         */
+        if (so->type == SHADER_VERTEX)
+                so->total_in = actual_in;
+        else
+                so->total_in = align(max_bary + 1, 4);
+out:
+        if (ret) {
+                ir3_destroy(so->ir);
+                so->ir = NULL;
+        }
+        compile_free(ctx);
+        return ret;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_cp.c
 ,0 → 1,415
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "freedreno_util.h"
+#include "ir3.h"
+/*
+ * Copy Propagate:
+ */
+/* is it a type preserving mov, with ok flags? */
+static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
+{
+        if (is_same_type_mov(instr)) {
+                struct ir3_register *dst = instr->regs[0];
+                struct ir3_register *src = instr->regs[1];
+                struct ir3_instruction *src_instr = ssa(src);
+                if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
+                        return false;
+                if (src->flags & IR3_REG_RELATIV)
+                        return false;
+                if (!allow_flags)
+                        if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
+                                        IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+                                return false;
+                if (!src_instr)
+                        return false;
+                /* TODO: remove this hack: */
+                if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO))
+                        return false;
+                return true;
+        }
+        return false;
+}
+static unsigned cp_flags(unsigned flags)
+{
+        /* only considering these flags (at least for now): */
+        flags &= (IR3_REG_CONST | IR3_REG_IMMED |
+                        IR3_REG_FNEG | IR3_REG_FABS |
+                        IR3_REG_SNEG | IR3_REG_SABS |
+                        IR3_REG_BNOT | IR3_REG_RELATIV);
+        return flags;
+}
+static bool valid_flags(struct ir3_instruction *instr, unsigned n,
+                unsigned flags)
+{
+        unsigned valid_flags;
+        flags = cp_flags(flags);
+        /* clear flags that are 'ok' */
+        switch (instr->category) {
+        case 1:
+                valid_flags = IR3_REG_IMMED | IR3_REG_RELATIV;
+                if (flags & ~valid_flags)
+                        return false;
+                break;
+        case 5:
+                /* no flags allowed */
+                if (flags)
+                        return false;
+                break;
+        case 6:
+                valid_flags = IR3_REG_IMMED;
+                if (flags & ~valid_flags)
+                        return false;
+                break;
+        case 2:
+                valid_flags = ir3_cat2_absneg(instr->opc) |
+                                IR3_REG_CONST | IR3_REG_RELATIV;
+                if (ir3_cat2_int(instr->opc))
+                        valid_flags |= IR3_REG_IMMED;
+                if (flags & ~valid_flags)
+                        return false;
+                if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
+                        unsigned m = (n ^ 1) + 1;
+                        /* cannot deal w/ const in both srcs:
+                         * (note that some cat2 actually only have a single src)
+                         */
+                        if (m < instr->regs_count) {
+                                struct ir3_register *reg = instr->regs[m];
+                                if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
+                                        return false;
+                                if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
+                                        return false;
+                        }
+                        /* cannot be const + ABS|NEG: */
+                        if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+                                        IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+                                return false;
+                }
+                break;
+        case 3:
+                valid_flags = ir3_cat3_absneg(instr->opc) |
+                                IR3_REG_CONST | IR3_REG_RELATIV;
+                if (flags & ~valid_flags)
+                        return false;
+                if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
+                        /* cannot deal w/ const/relativ in 2nd src: */
+                        if (n == 1)
+                                return false;
+                }
+                if (flags & IR3_REG_CONST) {
+                        /* cannot be const + ABS|NEG: */
+                        if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+                                        IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+                                return false;
+                }
+                break;
+        case 4:
+                /* seems like blob compiler avoids const as src.. */
+                /* TODO double check if this is still the case on a4xx */
+                if (flags & IR3_REG_CONST)
+                        return false;
+                if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
+                        return false;
+                break;
+        }
+        return true;
+}
+/* propagate register flags from src to dst.. negates need special
+ * handling to cancel each other out.
+ */
+static void combine_flags(unsigned *dstflags, unsigned srcflags)
+{
+        /* if what we are combining into already has (abs) flags,
+         * we can drop (neg) from src:
+         */
+        if (*dstflags & IR3_REG_FABS)
+                srcflags &= ~IR3_REG_FNEG;
+        if (*dstflags & IR3_REG_SABS)
+                srcflags &= ~IR3_REG_SNEG;
+        if (srcflags & IR3_REG_FABS)
+                *dstflags |= IR3_REG_FABS;
+        if (srcflags & IR3_REG_SABS)
+                *dstflags |= IR3_REG_SABS;
+        if (srcflags & IR3_REG_FNEG)
+                *dstflags ^= IR3_REG_FNEG;
+        if (srcflags & IR3_REG_SNEG)
+                *dstflags ^= IR3_REG_SNEG;
+        if (srcflags & IR3_REG_BNOT)
+                *dstflags ^= IR3_REG_BNOT;
+}
+static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
+/* the "plain" MAD's (ie. the ones that don't shift first src prior to
+ * multiply) can swap their first two srcs if src[0] is !CONST and
+ * src[1] is CONST:
+ */
+static bool is_valid_mad(struct ir3_instruction *instr)
+{
+        return (instr->category == 3) && is_mad(instr->opc);
+}
+/**
+ * Handle cp for a given src register.  This additionally handles
+ * the cases of collapsing immedate/const (which replace the src
+ * register with a non-ssa src) or collapsing mov's from relative
+ * src (which needs to also fixup the address src reference by the
+ * instruction).
+ */
+static void
+reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
+{
+        unsigned src_flags = 0, new_flags;
+        struct ir3_instruction *src_instr;
+        if (is_meta(instr)) {
+                /* meta instructions cannot fold up register
+                 * flags.. they are usually src for texture
+                 * fetch, etc, where we cannot specify abs/neg
+                 */
+                reg->instr = instr_cp(reg->instr, NULL);
+                return;
+        }
+        src_instr = instr_cp(reg->instr, &src_flags);
+        new_flags = reg->flags;
+        combine_flags(&new_flags, src_flags);
+        reg->flags = new_flags;
+        reg->instr = src_instr;
+        if (!valid_flags(instr, n, reg->flags)) {
+                /* insert an absneg.f */
+                if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) {
+                        debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS)));
+                        reg->instr = ir3_ABSNEG_S(instr->block,
+                                        reg->instr, cp_flags(src_flags));
+                } else {
+                        debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)));
+                        reg->instr = ir3_ABSNEG_F(instr->block,
+                                        reg->instr, cp_flags(src_flags));
+                }
+                reg->flags &= ~cp_flags(src_flags);
+                debug_assert(valid_flags(instr, n, reg->flags));
+                /* send it through instr_cp() again since
+                 * the absneg src might be a mov from const
+                 * that could be cleaned up:
+                 */
+                reg->instr = instr_cp(reg->instr, NULL);
+                return;
+        }
+        if (is_same_type_mov(reg->instr)) {
+                struct ir3_register *src_reg = reg->instr->regs[1];
+                unsigned new_flags = src_reg->flags;
+                combine_flags(&new_flags, reg->flags);
+                if (!valid_flags(instr, n, new_flags)) {
+                        /* special case for "normal" mad instructions, we can
+                         * try swapping the first two args if that fits better.
+                         */
+                        if ((n == 1) && is_valid_mad(instr) &&
+                                        !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
+                                        valid_flags(instr, 0, new_flags)) {
+                                /* swap src[0] and src[1]: */
+                                struct ir3_register *tmp;
+                                tmp = instr->regs[0 + 1];
+                                instr->regs[0 + 1] = instr->regs[1 + 1];
+                                instr->regs[1 + 1] = tmp;
+                                n = 0;
+                        } else {
+                                return;
+                        }
+                }
+                /* Here we handle the special case of mov from
+                 * CONST and/or RELATIV.  These need to be handled
+                 * specially, because in the case of move from CONST
+                 * there is no src ir3_instruction so we need to
+                 * replace the ir3_register.  And in the case of
+                 * RELATIV we need to handle the address register
+                 * dependency.
+                 */
+                if (src_reg->flags & IR3_REG_CONST) {
+                        /* an instruction cannot reference two different
+                         * address registers:
+                         */
+                        if ((src_reg->flags & IR3_REG_RELATIV) &&
+                                        conflicts(instr->address, reg->instr->address))
+                                return;
+                        src_reg->flags = new_flags;
+                        instr->regs[n+1] = src_reg;
+                        if (src_reg->flags & IR3_REG_RELATIV)
+                                instr->address = reg->instr->address;
+                        return;
+                }
+                if ((src_reg->flags & IR3_REG_RELATIV) &&
+                                !conflicts(instr->address, reg->instr->address)) {
+                        src_reg->flags = new_flags;
+                        instr->regs[n+1] = src_reg;
+                        instr->address = reg->instr->address;
+                        return;
+                }
+                /* NOTE: seems we can only do immed integers, so don't
+                 * need to care about float.  But we do need to handle
+                 * abs/neg *before* checking that the immediate requires
+                 * few enough bits to encode:
+                 *
+                 * TODO: do we need to do something to avoid accidentally
+                 * catching a float immed?
+                 */
+                if (src_reg->flags & IR3_REG_IMMED) {
+                        int32_t iim_val = src_reg->iim_val;
+                        debug_assert((instr->category == 1) ||
+                                        (instr->category == 6) ||
+                                        ((instr->category == 2) &&
+                                                ir3_cat2_int(instr->opc)));
+                        if (new_flags & IR3_REG_SABS)
+                                iim_val = abs(iim_val);
+                        if (new_flags & IR3_REG_SNEG)
+                                iim_val = -iim_val;
+                        if (new_flags & IR3_REG_BNOT)
+                                iim_val = ~iim_val;
+                        if (!(iim_val & ~0x3ff)) {
+                                new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
+                                src_reg->flags = new_flags;
+                                src_reg->iim_val = iim_val;
+                                instr->regs[n+1] = src_reg;
+                        }
+                        return;
+                }
+        }
+}
+/**
+ * Given an SSA src (instruction), return the one with extraneous
+ * mov's removed, ie, for (to copy NIR syntax):
+ *
+ *   vec1 ssa1 = fadd <something>, <somethingelse>
+ *   vec1 ssa2 = fabs ssa1
+ *   vec1 ssa3 = fneg ssa1
+ *
+ * then calling instr_cp(ssa3, &flags) would return ssa1 with
+ * (IR3_REG_ABS | IR3_REG_NEGATE) in flags.  If flags is NULL,
+ * then disallow eliminating copies which would require flag
+ * propagation (for example, we cannot propagate abs/neg into
+ * an output).
+ */
+static struct ir3_instruction *
+instr_cp(struct ir3_instruction *instr, unsigned *flags)
+{
+        struct ir3_register *reg;
+        /* stay within the block.. don't try to operate across
+         * basic block boundaries or we'll have problems when
+         * dealing with multiple basic blocks:
+         */
+        if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
+                return instr;
+        if (is_eligible_mov(instr, !!flags)) {
+                struct ir3_register *reg = instr->regs[1];
+                struct ir3_instruction *src_instr = ssa(reg);
+                if (flags)
+                        combine_flags(flags, reg->flags);
+                return instr_cp(src_instr, flags);
+        }
+        /* Check termination condition before walking children (rather
+         * than before checking eligible-mov).  A mov instruction may
+         * appear as ssa-src for multiple other instructions, and we
+         * want to consider it for removal for each, rather than just
+         * the first one.  (But regardless of how many places it shows
+         * up as a src, we only need to recursively walk the children
+         * once.)
+         */
+        if (ir3_instr_check_mark(instr))
+                return instr;
+        /* walk down the graph from each src: */
+        foreach_src_n(reg, n, instr) {
+                if (!(reg->flags & IR3_REG_SSA))
+                        continue;
+                reg_cp(instr, reg, n);
+        }
+        if (instr->address)
+                instr->address = instr_cp(instr->address, NULL);
+        return instr;
+}
+static void block_cp(struct ir3_block *block)
+{
+        unsigned i;
+        for (i = 0; i < block->noutputs; i++) {
+                if (block->outputs[i]) {
+                        struct ir3_instruction *out =
+                                        instr_cp(block->outputs[i], NULL);
+                        block->outputs[i] = out;
+                }
+        }
+}
+void ir3_block_cp(struct ir3_block *block)
+{
+        ir3_clear_mark(block->shader);
+        block_cp(block);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_depth.c
 ,0 → 1,169
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "util/u_math.h"
+#include "ir3.h"
+/*
+ * Instruction Depth:
+ *
+ * Calculates weighted instruction depth, ie. the sum of # of needed
+ * instructions plus delay slots back to original input (ie INPUT or
+ * CONST).  That is to say, an instructions depth is:
+ *
+ *   depth(instr) {
+ *     d = 0;
+ *     // for each src register:
+ *     foreach (src in instr->regs[1..n])
+ *       d = max(d, delayslots(src->instr, n) + depth(src->instr));
+ *     return d + 1;
+ *   }
+ *
+ * After an instruction's depth is calculated, it is inserted into the
+ * blocks depth sorted list, which is used by the scheduling pass.
+ */
+/* calculate required # of delay slots between the instruction that
+ * assigns a value and the one that consumes
+ */
+int ir3_delayslots(struct ir3_instruction *assigner,
+                struct ir3_instruction *consumer, unsigned n)
+{
+        /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
+         * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
+         * handled with sync bits
+         */
+        if (is_meta(assigner))
+                return 0;
+        if (writes_addr(assigner))
+                return 6;
+        /* handled via sync flags: */
+        if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
+                return 0;
+        /* assigner must be alu: */
+        if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
+                        is_mem(consumer)) {
+                return 6;
+        } else if ((consumer->category == 3) &&
+                        (is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
+                        (n == 2)) {
+                /* special case, 3rd src to cat3 not required on first cycle */
+                return 1;
+        } else {
+                return 3;
+        }
+}
+static void insert_by_depth(struct ir3_instruction *instr)
+{
+        struct ir3_block *block = instr->block;
+        struct ir3_instruction *n = block->head;
+        struct ir3_instruction *p = NULL;
+        while (n && (n != instr) && (n->depth > instr->depth)) {
+                p = n;
+                n = n->next;
+        }
+        instr->next = n;
+        if (p)
+                p->next = instr;
+        else
+                block->head = instr;
+}
+static void ir3_instr_depth(struct ir3_instruction *instr)
+{
+        struct ir3_instruction *src;
+        /* if we've already visited this instruction, bail now: */
+        if (ir3_instr_check_mark(instr))
+                return;
+        instr->depth = 0;
+        foreach_ssa_src_n(src, i, instr) {
+                unsigned sd;
+                /* visit child to compute it's depth: */
+                ir3_instr_depth(src);
+                sd = ir3_delayslots(src, instr, i) + src->depth;
+                instr->depth = MAX2(instr->depth, sd);
+        }
+        /* meta-instructions don't add cycles, other than PHI.. which
+         * might translate to a real instruction..
+         *
+         * well, not entirely true, fan-in/out, etc might need to need
+         * to generate some extra mov's in edge cases, etc.. probably
+         * we might want to do depth calculation considering the worst
+         * case for these??
+         */
+        if (!is_meta(instr))
+                instr->depth++;
+        insert_by_depth(instr);
+}
+void ir3_block_depth(struct ir3_block *block)
+{
+        unsigned i;
+        block->head = NULL;
+        ir3_clear_mark(block->shader);
+        for (i = 0; i < block->noutputs; i++)
+                if (block->outputs[i])
+                        ir3_instr_depth(block->outputs[i]);
+        /* mark un-used instructions: */
+        for (i = 0; i < block->shader->instrs_count; i++) {
+                struct ir3_instruction *instr = block->shader->instrs[i];
+                /* just consider instructions within this block: */
+                if (instr->block != block)
+                        continue;
+                if (!ir3_instr_check_mark(instr))
+                        instr->depth = DEPTH_UNUSED;
+        }
+        /* cleanup unused inputs: */
+        for (i = 0; i < block->ninputs; i++) {
+                struct ir3_instruction *in = block->inputs[i];
+                if (in && (in->depth == DEPTH_UNUSED))
+                        block->inputs[i] = NULL;
+        }
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_dump.c
 ,0 → 1,456
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include <stdarg.h>
+#include "ir3.h"
+#define PTRID(x) ((unsigned long)(x))
+struct ir3_dump_ctx {
+        FILE *f;
+        bool verbose;
+};
+static void dump_instr_name(struct ir3_dump_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        /* for debugging: */
+        if (ctx->verbose) {
+#ifdef DEBUG
+                fprintf(ctx->f, "%04u:", instr->serialno);
+#endif
+                fprintf(ctx->f, "%03u: ", instr->depth);
+        }
+        if (instr->flags & IR3_INSTR_SY)
+                fprintf(ctx->f, "(sy)");
+        if (instr->flags & IR3_INSTR_SS)
+                fprintf(ctx->f, "(ss)");
+        if (is_meta(instr)) {
+                switch(instr->opc) {
+                case OPC_META_PHI:
+                        fprintf(ctx->f, "&#934;");
+                        break;
+                default:
+                        /* shouldn't hit here.. just for debugging: */
+                        switch (instr->opc) {
+                        case OPC_META_INPUT:  fprintf(ctx->f, "_meta:in");   break;
+                        case OPC_META_OUTPUT: fprintf(ctx->f, "_meta:out");  break;
+                        case OPC_META_FO:     fprintf(ctx->f, "_meta:fo");   break;
+                        case OPC_META_FI:     fprintf(ctx->f, "_meta:fi");   break;
+                        case OPC_META_FLOW:   fprintf(ctx->f, "_meta:flow"); break;
+                        default: fprintf(ctx->f, "_meta:%d", instr->opc); break;
+                        }
+                        break;
+                }
+        } else if (instr->category == 1) {
+                static const char *type[] = {
+                                [TYPE_F16] = "f16",
+                                [TYPE_F32] = "f32",
+                                [TYPE_U16] = "u16",
+                                [TYPE_U32] = "u32",
+                                [TYPE_S16] = "s16",
+                                [TYPE_S32] = "s32",
+                                [TYPE_U8]  = "u8",
+                                [TYPE_S8]  = "s8",
+                };
+                if (instr->cat1.src_type == instr->cat1.dst_type)
+                        fprintf(ctx->f, "mov");
+                else
+                        fprintf(ctx->f, "cov");
+                fprintf(ctx->f, ".%s%s", type[instr->cat1.src_type], type[instr->cat1.dst_type]);
+        } else {
+                fprintf(ctx->f, "%s", ir3_instr_name(instr));
+                if (instr->flags & IR3_INSTR_3D)
+                        fprintf(ctx->f, ".3d");
+                if (instr->flags & IR3_INSTR_A)
+                        fprintf(ctx->f, ".a");
+                if (instr->flags & IR3_INSTR_O)
+                        fprintf(ctx->f, ".o");
+                if (instr->flags & IR3_INSTR_P)
+                        fprintf(ctx->f, ".p");
+                if (instr->flags & IR3_INSTR_S)
+                        fprintf(ctx->f, ".s");
+                if (instr->flags & IR3_INSTR_S2EN)
+                        fprintf(ctx->f, ".s2en");
+        }
+}
+static void dump_reg_name(struct ir3_dump_ctx *ctx,
+                struct ir3_register *reg, bool followssa)
+{
+        if ((reg->flags & (IR3_REG_FABS | IR3_REG_SABS)) &&
+                        (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)))
+                fprintf(ctx->f, "(absneg)");
+        else if (reg->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))
+                fprintf(ctx->f, "(neg)");
+        else if (reg->flags & (IR3_REG_FABS | IR3_REG_SABS))
+                fprintf(ctx->f, "(abs)");
+        if (reg->flags & IR3_REG_IMMED) {
+                fprintf(ctx->f, "imm[%f,%d,0x%x]", reg->fim_val, reg->iim_val, reg->iim_val);
+        } else if (reg->flags & IR3_REG_SSA) {
+                if (ctx->verbose) {
+                        fprintf(ctx->f, "_");
+                        if (followssa) {
+                                fprintf(ctx->f, "[");
+                                dump_instr_name(ctx, reg->instr);
+                                fprintf(ctx->f, "]");
+                        }
+                }
+        } else if (reg->flags & IR3_REG_RELATIV) {
+                if (reg->flags & IR3_REG_HALF)
+                        fprintf(ctx->f, "h");
+                if (reg->flags & IR3_REG_CONST)
+                        fprintf(ctx->f, "c<a0.x + %u>", reg->num);
+                else
+                        fprintf(ctx->f, "\x1b[0;31mr<a0.x + %u>\x1b[0m (%u)", reg->num, reg->size);
+        } else {
+                if (reg->flags & IR3_REG_HALF)
+                        fprintf(ctx->f, "h");
+                if (reg->flags & IR3_REG_CONST)
+                        fprintf(ctx->f, "c%u.%c", reg_num(reg), "xyzw"[reg_comp(reg)]);
+                else
+                        fprintf(ctx->f, "\x1b[0;31mr%u.%c\x1b[0m", reg_num(reg), "xyzw"[reg_comp(reg)]);
+        }
+}
+static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
+                struct ir3_instruction *instr);
+static void ir3_block_dump(struct ir3_dump_ctx *ctx,
+                struct ir3_block *block, const char *name);
+static void dump_instr(struct ir3_dump_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        /* if we've already visited this instruction, bail now: */
+        if (ir3_instr_check_mark(instr))
+                return;
+        /* some meta-instructions need to be handled specially: */
+        if (is_meta(instr)) {
+                if ((instr->opc == OPC_META_FO) ||
+                                (instr->opc == OPC_META_FI)) {
+                        struct ir3_instruction *src;
+                        foreach_ssa_src(src, instr)
+                                dump_instr(ctx, src);
+                } else if (instr->opc == OPC_META_FLOW) {
+                        struct ir3_register *reg = instr->regs[1];
+                        ir3_block_dump(ctx, instr->flow.if_block, "if");
+                        if (instr->flow.else_block)
+                                ir3_block_dump(ctx, instr->flow.else_block, "else");
+                        if (reg->flags & IR3_REG_SSA)
+                                dump_instr(ctx, reg->instr);
+                } else if (instr->opc == OPC_META_PHI) {
+                        /* treat like a normal instruction: */
+                        ir3_instr_dump(ctx, instr);
+                }
+        } else {
+                ir3_instr_dump(ctx, instr);
+        }
+}
+/* arrarraggh!  if link is to something outside of the current block, we
+ * need to defer emitting the link until the end of the block, since the
+ * edge triggers pre-creation of the node it links to inside the cluster,
+ * even though it is meant to be outside..
+ */
+static struct {
+        char buf[40960];
+        unsigned n;
+} edge_buf;
+/* helper to print or defer: */
+static void printdef(struct ir3_dump_ctx *ctx,
+                bool defer, const char *fmt, ...)
+{
+        va_list ap;
+        va_start(ap, fmt);
+        if (defer) {
+                unsigned n = edge_buf.n;
+                n += vsnprintf(&edge_buf.buf[n], sizeof(edge_buf.buf) - n,
+                                fmt, ap);
+                edge_buf.n = n;
+        } else {
+                vfprintf(ctx->f, fmt, ap);
+        }
+        va_end(ap);
+}
+static void dump_link2(struct ir3_dump_ctx *ctx,
+                struct ir3_instruction *instr, const char *target, bool defer)
+{
+        /* some meta-instructions need to be handled specially: */
+        if (is_meta(instr)) {
+                if (instr->opc == OPC_META_INPUT) {
+                        printdef(ctx, defer, "input%lx:<in%u>:w -> %s",
+                                        PTRID(instr->inout.block),
+                                        instr->regs[0]->num, target);
+                } else if (instr->opc == OPC_META_FO) {
+                        struct ir3_register *reg = instr->regs[1];
+                        dump_link2(ctx, reg->instr, target, defer);
+                        printdef(ctx, defer, "[label=\".%c\"]",
+                                        "xyzw"[instr->fo.off & 0x3]);
+                } else if (instr->opc == OPC_META_FI) {
+                        struct ir3_instruction *src;
+                        foreach_ssa_src_n(src, i, instr) {
+                                dump_link2(ctx, src, target, defer);
+                                printdef(ctx, defer, "[label=\".%c\"]",
+                                                "xyzw"[i & 0x3]);
+                        }
+                } else if (instr->opc == OPC_META_OUTPUT) {
+                        printdef(ctx, defer, "output%lx:<out%u>:w -> %s",
+                                        PTRID(instr->inout.block),
+                                        instr->regs[0]->num, target);
+                } else if (instr->opc == OPC_META_PHI) {
+                        /* treat like a normal instruction: */
+                        printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
+                }
+        } else {
+                printdef(ctx, defer, "instr%lx:<dst0> -> %s", PTRID(instr), target);
+        }
+}
+static void dump_link(struct ir3_dump_ctx *ctx,
+                struct ir3_instruction *instr,
+                struct ir3_block *block, const char *target)
+{
+        bool defer = instr->block != block;
+        dump_link2(ctx, instr, target, defer);
+        printdef(ctx, defer, "\n");
+}
+static struct ir3_register *follow_flow(struct ir3_register *reg)
+{
+        if (reg->flags & IR3_REG_SSA) {
+                struct ir3_instruction *instr = reg->instr;
+                /* go with the flow.. */
+                if (is_meta(instr) && (instr->opc == OPC_META_FLOW))
+                        return instr->regs[1];
+        }
+        return reg;
+}
+static void ir3_instr_dump(struct ir3_dump_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        struct ir3_register *src;
+        fprintf(ctx->f, "instr%lx [shape=record,style=filled,fillcolor=lightgrey,label=\"{",
+                        PTRID(instr));
+        dump_instr_name(ctx, instr);
+        /* destination register: */
+        fprintf(ctx->f, "|<dst0>");
+        /* source register(s): */
+        foreach_src_n(src, i, instr) {
+                struct ir3_register *reg = follow_flow(src);
+                fprintf(ctx->f, "|");
+                if (reg->flags & IR3_REG_SSA)
+                        fprintf(ctx->f, "<src%u> ", i);
+                dump_reg_name(ctx, reg, true);
+        }
+        fprintf(ctx->f, "}\"];\n");
+        /* and recursively dump dependent instructions: */
+        foreach_src_n(src, i, instr) {
+                struct ir3_register *reg = follow_flow(src);
+                char target[32];  /* link target */
+                if (!(reg->flags & IR3_REG_SSA))
+                        continue;
+                snprintf(target, sizeof(target), "instr%lx:<src%u>",
+                                PTRID(instr), i);
+                dump_instr(ctx, reg->instr);
+                dump_link(ctx, reg->instr, instr->block, target);
+        }
+}
+static void ir3_block_dump(struct ir3_dump_ctx *ctx,
+                struct ir3_block *block, const char *name)
+{
+        unsigned i, n;
+        n = edge_buf.n;
+        fprintf(ctx->f, "subgraph cluster%lx {\n", PTRID(block));
+        fprintf(ctx->f, "label=\"%s\";\n", name);
+        /* draw inputs: */
+        fprintf(ctx->f, "input%lx [shape=record,label=\"inputs", PTRID(block));
+        for (i = 0; i < block->ninputs; i++)
+                if (block->inputs[i])
+                        fprintf(ctx->f, "|<in%u> i%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
+        fprintf(ctx->f, "\"];\n");
+        /* draw instruction graph: */
+        for (i = 0; i < block->noutputs; i++)
+                if (block->outputs[i])
+                        dump_instr(ctx, block->outputs[i]);
+        /* draw outputs: */
+        fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block));
+        for (i = 0; i < block->noutputs; i++)
+                fprintf(ctx->f, "|<out%u> o%u.%c", i, (i >> 2), "xyzw"[i & 0x3]);
+        fprintf(ctx->f, "\"];\n");
+        /* and links to outputs: */
+        for (i = 0; i < block->noutputs; i++) {
+                char target[32];  /* link target */
+                /* NOTE: there could be outputs that are never assigned,
+                 * so skip them
+                 */
+                if (!block->outputs[i])
+                        continue;
+                snprintf(target, sizeof(target), "output%lx:<out%u>:e",
+                                PTRID(block), i);
+                dump_link(ctx, block->outputs[i], block, target);
+        }
+        fprintf(ctx->f, "}\n");
+        /* and links to inputs: */
+        if (block->parent) {
+                for (i = 0; i < block->ninputs; i++) {
+                        char target[32];  /* link target */
+                        if (!block->inputs[i])
+                                continue;
+                        dump_instr(ctx, block->inputs[i]);
+                        snprintf(target, sizeof(target), "input%lx:<in%u>:e",
+                                        PTRID(block), i);
+                        dump_link(ctx, block->inputs[i], block, target);
+                }
+        }
+        /* dump deferred edges: */
+        if (edge_buf.n > n) {
+                fprintf(ctx->f, "%*s", edge_buf.n - n, &edge_buf.buf[n]);
+                edge_buf.n = n;
+        }
+}
+void ir3_dump(struct ir3 *shader, const char *name,
+                struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
+                FILE *f)
+{
+        struct ir3_dump_ctx ctx = {
+                        .f = f,
+        };
+        ir3_clear_mark(shader);
+        fprintf(ctx.f, "digraph G {\n");
+        fprintf(ctx.f, "rankdir=RL;\n");
+        fprintf(ctx.f, "nodesep=0.25;\n");
+        fprintf(ctx.f, "ranksep=1.5;\n");
+        ir3_block_dump(&ctx, block, name);
+        fprintf(ctx.f, "}\n");
+}
+/*
+ * For Debugging:
+ */
+void
+ir3_dump_instr_single(struct ir3_instruction *instr)
+{
+        struct ir3_dump_ctx ctx = {
+                        .f = stdout,
+                        .verbose = true,
+        };
+        unsigned i;
+        dump_instr_name(&ctx, instr);
+        for (i = 0; i < instr->regs_count; i++) {
+                struct ir3_register *reg = instr->regs[i];
+                printf(i ? ", " : " ");
+                dump_reg_name(&ctx, reg, !!i);
+        }
+        if (instr->address) {
+                fprintf(ctx.f, ", address=_");
+                fprintf(ctx.f, "[");
+                dump_instr_name(&ctx, instr->address);
+                fprintf(ctx.f, "]");
+        }
+        if (instr->fanin) {
+                fprintf(ctx.f, ", fanin=_");
+                fprintf(ctx.f, "[");
+                dump_instr_name(&ctx, instr->fanin);
+                fprintf(ctx.f, "]");
+        }
+        if (is_meta(instr)) {
+                if (instr->opc == OPC_META_FO) {
+                        printf(", off=%d", instr->fo.off);
+                } else if ((instr->opc == OPC_META_FI) && instr->fi.aid) {
+                        printf(", aid=%d", instr->fi.aid);
+                }
+        }
+        printf("\n");
+}
+void
+ir3_dump_instr_list(struct ir3_instruction *instr)
+{
+        struct ir3_block *block = instr->block;
+        unsigned n = 0;
+        while (instr) {
+                ir3_dump_instr_single(instr);
+                if (!is_meta(instr))
+                        n++;
+                instr = instr->next;
+        }
+        printf("%u instructions\n", n);
+        for (n = 0; n < block->noutputs; n++) {
+                if (!block->outputs[n])
+                        continue;
+                printf("out%d: ", n);
+                ir3_dump_instr_single(block->outputs[n]);
+        }
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_flatten.c
 ,0 → 1,152
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include <stdarg.h>
+#include "ir3.h"
+/*
+ * Flatten: flatten out legs of if/else, etc
+ *
+ * TODO probably should use some heuristic to decide to not flatten
+ * if one side of the other is too large / deeply nested / whatever?
+ */
+struct ir3_flatten_ctx {
+        struct ir3_block *block;
+        unsigned cnt;
+};
+static struct ir3_register *unwrap(struct ir3_register *reg)
+{
+        if (reg->flags & IR3_REG_SSA) {
+                struct ir3_instruction *instr = reg->instr;
+                if (is_meta(instr)) {
+                        switch (instr->opc) {
+                        case OPC_META_OUTPUT:
+                        case OPC_META_FLOW:
+                                if (instr->regs_count > 1)
+                                        return instr->regs[1];
+                                return NULL;
+                        default:
+                                break;
+                        }
+                }
+        }
+        return reg;
+}
+static void ir3_instr_flatten(struct ir3_flatten_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        struct ir3_instruction *src;
+        /* if we've already visited this instruction, bail now: */
+        if (ir3_instr_check_mark(instr))
+                return;
+        instr->block = ctx->block;
+        /* TODO: maybe some threshold to decide whether to
+         * flatten or not??
+         */
+        if (is_meta(instr)) {
+                if (instr->opc == OPC_META_PHI) {
+                        struct ir3_register *cond, *t, *f;
+                        cond = unwrap(instr->regs[1]);
+                        t    = unwrap(instr->regs[2]);  /* true val */
+                        f    = unwrap(instr->regs[3]);  /* false val */
+                        /* must have cond, but t or f may be null if only written
+                         * one one side of the if/else (in which case we can just
+                         * convert the PHI to a simple move).
+                         */
+                        assert(cond);
+                        assert(t || f);
+                        if (t && f) {
+                                /* convert the PHI instruction to sel.{b16,b32} */
+                                instr->category = 3;
+                                /* instruction type based on dst size: */
+                                if (instr->regs[0]->flags & IR3_REG_HALF)
+                                        instr->opc = OPC_SEL_B16;
+                                else
+                                        instr->opc = OPC_SEL_B32;
+                                instr->regs[1] = t;
+                                instr->regs[2] = cond;
+                                instr->regs[3] = f;
+                        } else {
+                                /* convert to simple mov: */
+                                instr->category = 1;
+                                instr->cat1.dst_type = TYPE_F32;
+                                instr->cat1.src_type = TYPE_F32;
+                                instr->regs_count = 2;
+                                instr->regs[1] = t ? t : f;
+                        }
+                        ctx->cnt++;
+                } else if ((instr->opc == OPC_META_INPUT) &&
+                                (instr->regs_count == 2)) {
+                        type_t ftype;
+                        if (instr->regs[0]->flags & IR3_REG_HALF)
+                                ftype = TYPE_F16;
+                        else
+                                ftype = TYPE_F32;
+                        /* convert meta:input to mov: */
+                        instr->category = 1;
+                        instr->cat1.src_type = ftype;
+                        instr->cat1.dst_type = ftype;
+                }
+        }
+        /* recursively visit children: */
+        foreach_ssa_src(src, instr)
+                ir3_instr_flatten(ctx, src);
+}
+/* return >= 0 is # of phi's flattened, < 0 is error */
+int ir3_block_flatten(struct ir3_block *block)
+{
+        struct ir3_flatten_ctx ctx = {
+                        .block = block,
+        };
+        unsigned i;
+        ir3_clear_mark(block->shader);
+        for(i = 0; i < block->noutputs; i++)
+                if (block->outputs[i])
+                        ir3_instr_flatten(&ctx, block->outputs[i]);
+        return ctx.cnt;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_group.c
 ,0 → 1,266
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "freedreno_util.h"
+#include "ir3.h"
+/*
+ * Find/group instruction neighbors:
+ */
+/* stop condition for iteration: */
+static bool check_stop(struct ir3_instruction *instr)
+{
+        if (ir3_instr_check_mark(instr))
+                return true;
+        /* stay within the block.. don't try to operate across
+         * basic block boundaries or we'll have problems when
+         * dealing with multiple basic blocks:
+         */
+        if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
+                return true;
+        return false;
+}
+static struct ir3_instruction * create_mov(struct ir3_instruction *instr)
+{
+        struct ir3_instruction *mov;
+        mov = ir3_instr_create(instr->block, 1, 0);
+        mov->cat1.src_type = TYPE_F32;
+        mov->cat1.dst_type = TYPE_F32;
+        ir3_reg_create(mov, 0, 0);    /* dst */
+        ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr;
+        return mov;
+}
+/* bleh.. we need to do the same group_n() thing for both inputs/outputs
+ * (where we have a simple instr[] array), and fanin nodes (where we have
+ * an extra indirection via reg->instr).
+ */
+struct group_ops {
+        struct ir3_instruction *(*get)(void *arr, int idx);
+        void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
+};
+static struct ir3_instruction *arr_get(void *arr, int idx)
+{
+        return ((struct ir3_instruction **)arr)[idx];
+}
+static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
+{
+        ((struct ir3_instruction **)arr)[idx] = create_mov(instr);
+}
+static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
+{
+        /* so, we can't insert a mov in front of a meta:in.. and the downstream
+         * instruction already has a pointer to 'instr'.  So we cheat a bit and
+         * morph the meta:in instruction into a mov and insert a new meta:in
+         * in front.
+         */
+        struct ir3_instruction *in;
+        debug_assert(instr->regs_count == 1);
+        in = ir3_instr_create(instr->block, -1, OPC_META_INPUT);
+        in->inout.block = instr->block;
+        ir3_reg_create(in, instr->regs[0]->num, 0);
+        /* create src reg for meta:in and fixup to now be a mov: */
+        ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
+        instr->category = 1;
+        instr->opc = 0;
+        instr->cat1.src_type = TYPE_F32;
+        instr->cat1.dst_type = TYPE_F32;
+        ((struct ir3_instruction **)arr)[idx] = in;
+}
+static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
+static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
+static struct ir3_instruction *instr_get(void *arr, int idx)
+{
+        return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
+}
+static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
+{
+        ((struct ir3_instruction *)arr)->regs[idx+1]->instr = create_mov(instr);
+}
+static struct group_ops instr_ops = { instr_get, instr_insert_mov };
+static void group_n(struct group_ops *ops, void *arr, unsigned n)
+{
+        unsigned i, j;
+        /* first pass, figure out what has conflicts and needs a mov
+         * inserted.  Do this up front, before starting to setup
+         * left/right neighbor pointers.  Trying to do it in a single
+         * pass could result in a situation where we can't even setup
+         * the mov's right neighbor ptr if the next instr also needs
+         * a mov.
+         */
+restart:
+        for (i = 0; i < n; i++) {
+                struct ir3_instruction *instr = ops->get(arr, i);
+                if (instr) {
+                        struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
+                        struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+                        bool conflict;
+                        /* check for left/right neighbor conflicts: */
+                        conflict = conflicts(instr->cp.left, left) ||
+                                conflicts(instr->cp.right, right);
+                        /* we also can't have an instr twice in the group: */
+                        for (j = i + 1; (j < n) && !conflict; j++)
+                                if (ops->get(arr, j) == instr)
+                                        conflict = true;
+                        if (conflict) {
+                                ops->insert_mov(arr, i, instr);
+                                /* inserting the mov may have caused a conflict
+                                 * against the previous:
+                                 */
+                                goto restart;
+                        }
+                }
+        }
+        /* second pass, now that we've inserted mov's, fixup left/right
+         * neighbors.  This is guaranteed to succeed, since by definition
+         * the newly inserted mov's cannot conflict with anything.
+         */
+        for (i = 0; i < n; i++) {
+                struct ir3_instruction *instr = ops->get(arr, i);
+                if (instr) {
+                        struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
+                        struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+                        debug_assert(!conflicts(instr->cp.left, left));
+                        if (left) {
+                                instr->cp.left_cnt++;
+                                instr->cp.left = left;
+                        }
+                        debug_assert(!conflicts(instr->cp.right, right));
+                        if (right) {
+                                instr->cp.right_cnt++;
+                                instr->cp.right = right;
+                        }
+                }
+        }
+}
+static void instr_find_neighbors(struct ir3_instruction *instr)
+{
+        struct ir3_instruction *src;
+        if (check_stop(instr))
+                return;
+        if (is_meta(instr) && (instr->opc == OPC_META_FI))
+                group_n(&instr_ops, instr, instr->regs_count - 1);
+        foreach_ssa_src(src, instr)
+                instr_find_neighbors(src);
+}
+/* a bit of sadness.. we can't have "holes" in inputs from PoV of
+ * register assignment, they still need to be grouped together.  So
+ * we need to insert dummy/padding instruction for grouping, and
+ * then take it back out again before anyone notices.
+ */
+static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
+{
+        int i, mask = 0;
+        struct ir3_block *block = NULL;
+        for (i = n - 1; i >= 0; i--) {
+                struct ir3_instruction *instr = input[i];
+                if (instr) {
+                        block = instr->block;
+                } else if (block) {
+                        instr = ir3_instr_create(block, 0, OPC_NOP);
+                        ir3_reg_create(instr, 0, IR3_REG_SSA);    /* dst */
+                        input[i] = instr;
+                        mask |= (1 << i);
+                }
+        }
+        group_n(&arr_ops_in, input, n);
+        for (i = 0; i < n; i++) {
+                if (mask & (1 << i))
+                        input[i] = NULL;
+        }
+}
+static void block_find_neighbors(struct ir3_block *block)
+{
+        unsigned i;
+        for (i = 0; i < block->noutputs; i++) {
+                if (block->outputs[i]) {
+                        struct ir3_instruction *instr = block->outputs[i];
+                        instr_find_neighbors(instr);
+                }
+        }
+        /* shader inputs/outputs themselves must be contiguous as well:
+         */
+        if (!block->parent) {
+                /* NOTE: group inputs first, since we only insert mov's
+                 * *before* the conflicted instr (and that would go badly
+                 * for inputs).  By doing inputs first, we should never
+                 * have a conflict on inputs.. pushing any conflict to
+                 * resolve to the outputs, for stuff like:
+                 *
+                 *     MOV OUT[n], IN[m].wzyx
+                 *
+                 * NOTE: we assume here inputs/outputs are grouped in vec4.
+                 * This logic won't quite cut it if we don't align smaller
+                 * on vec4 boundaries
+                 */
+                for (i = 0; i < block->ninputs; i += 4)
+                        pad_and_group_input(&block->inputs[i], 4);
+                for (i = 0; i < block->noutputs; i += 4)
+                        group_n(&arr_ops_out, &block->outputs[i], 4);
+        }
+}
+void ir3_block_group(struct ir3_block *block)
+{
+        ir3_clear_mark(block->shader);
+        block_find_neighbors(block);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
 ,0 → 1,248
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "freedreno_util.h"
+#include "ir3.h"
+/*
+ * Legalize:
+ *
+ * We currently require that scheduling ensures that we have enough nop's
+ * in all the right places.  The legalize step mostly handles fixing up
+ * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's
+ * into fewer nop's w/ rpt flag.
+ */
+struct ir3_legalize_ctx {
+        struct ir3_block *block;
+        bool has_samp;
+        int max_bary;
+};
+static void legalize(struct ir3_legalize_ctx *ctx)
+{
+        struct ir3_block *block = ctx->block;
+        struct ir3_instruction *n;
+        struct ir3 *shader = block->shader;
+        struct ir3_instruction *end =
+                        ir3_instr_create(block, 0, OPC_END);
+        struct ir3_instruction *last_input = NULL;
+        struct ir3_instruction *last_rel = NULL;
+        regmask_t needs_ss_war;       /* write after read */
+        regmask_t needs_ss;
+        regmask_t needs_sy;
+        regmask_init(&needs_ss_war);
+        regmask_init(&needs_ss);
+        regmask_init(&needs_sy);
+        shader->instrs_count = 0;
+        for (n = block->head; n; n = n->next) {
+                struct ir3_register *reg;
+                unsigned i;
+                if (is_meta(n))
+                        continue;
+                if (is_input(n)) {
+                        struct ir3_register *inloc = n->regs[1];
+                        assert(inloc->flags & IR3_REG_IMMED);
+                        ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
+                }
+                /* NOTE: consider dst register too.. it could happen that
+                 * texture sample instruction (for example) writes some
+                 * components which are unused.  A subsequent instruction
+                 * that writes the same register can race w/ the sam instr
+                 * resulting in undefined results:
+                 */
+                for (i = 0; i < n->regs_count; i++) {
+                        reg = n->regs[i];
+                        if (reg_gpr(reg)) {
+                                /* TODO: we probably only need (ss) for alu
+                                 * instr consuming sfu result.. need to make
+                                 * some tests for both this and (sy)..
+                                 */
+                                if (regmask_get(&needs_ss, reg)) {
+                                        n->flags |= IR3_INSTR_SS;
+                                        regmask_init(&needs_ss);
+                                }
+                                if (regmask_get(&needs_sy, reg)) {
+                                        n->flags |= IR3_INSTR_SY;
+                                        regmask_init(&needs_sy);
+                                }
+                        }
+                        /* TODO: is it valid to have address reg loaded from a
+                         * relative src (ie. mova a0, c<a0.x+4>)?  If so, the
+                         * last_rel check below should be moved ahead of this:
+                         */
+                        if (reg->flags & IR3_REG_RELATIV)
+                                last_rel = n;
+                }
+                if (n->regs_count > 0) {
+                        reg = n->regs[0];
+                        if (regmask_get(&needs_ss_war, reg)) {
+                                n->flags |= IR3_INSTR_SS;
+                                regmask_init(&needs_ss_war); // ??? I assume?
+                        }
+                        if (last_rel && (reg->num == regid(REG_A0, 0))) {
+                                last_rel->flags |= IR3_INSTR_UL;
+                                last_rel = NULL;
+                        }
+                }
+                /* cat5+ does not have an (ss) bit, if needed we need to
+                 * insert a nop to carry the sync flag.  Would be kinda
+                 * clever if we were aware of this during scheduling, but
+                 * this should be a pretty rare case:
+                 */
+                if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
+                        struct ir3_instruction *nop;
+                        nop = ir3_instr_create(block, 0, OPC_NOP);
+                        nop->flags |= IR3_INSTR_SS;
+                        n->flags &= ~IR3_INSTR_SS;
+                }
+                /* need to be able to set (ss) on first instruction: */
+                if ((shader->instrs_count == 0) && (n->category >= 5))
+                        ir3_instr_create(block, 0, OPC_NOP);
+                if (is_nop(n) && shader->instrs_count) {
+                        struct ir3_instruction *last =
+                                        shader->instrs[shader->instrs_count-1];
+                        if (is_nop(last) && (last->repeat < 5)) {
+                                last->repeat++;
+                                last->flags |= n->flags;
+                                continue;
+                        }
+                }
+                shader->instrs[shader->instrs_count++] = n;
+                if (is_sfu(n))
+                        regmask_set(&needs_ss, n->regs[0]);
+                if (is_tex(n)) {
+                        /* this ends up being the # of samp instructions.. but that
+                         * is ok, everything else only cares whether it is zero or
+                         * not.  We do this here, rather than when we encounter a
+                         * SAMP decl, because (especially in binning pass shader)
+                         * the samp instruction(s) could get eliminated if the
+                         * result is not used.
+                         */
+                        ctx->has_samp = true;
+                        regmask_set(&needs_sy, n->regs[0]);
+                } else if (is_mem(n)) {
+                        regmask_set(&needs_sy, n->regs[0]);
+                }
+                /* both tex/sfu appear to not always immediately consume
+                 * their src register(s):
+                 */
+                if (is_tex(n) || is_sfu(n) || is_mem(n)) {
+                        foreach_src(reg, n) {
+                                if (reg_gpr(reg))
+                                        regmask_set(&needs_ss_war, reg);
+                        }
+                }
+                if (is_input(n))
+                        last_input = n;
+        }
+        if (last_input) {
+                /* special hack.. if using ldlv to bypass interpolation,
+                 * we need to insert a dummy bary.f on which we can set
+                 * the (ei) flag:
+                 */
+                if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
+                        int i, cnt;
+                        /* note that ir3_instr_create() inserts into
+                         * shader->instrs[] and increments the count..
+                         * so we need to bump up the cnt initially (to
+                         * avoid it clobbering the last real instr) and
+                         * restore it after.
+                         */
+                        cnt = ++shader->instrs_count;
+                        /* inserting instructions would be a bit nicer if list.. */
+                        for (i = cnt - 2; i >= 0; i--) {
+                                if (shader->instrs[i] == last_input) {
+                                        /* (ss)bary.f (ei)r63.x, 0, r0.x */
+                                        last_input = ir3_instr_create(block, 2, OPC_BARY_F);
+                                        last_input->flags |= IR3_INSTR_SS;
+                                        ir3_reg_create(last_input, regid(63, 0), 0);
+                                        ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
+                                        ir3_reg_create(last_input, regid(0, 0), 0);
+                                        shader->instrs[i + 1] = last_input;
+                                        break;
+                                }
+                                shader->instrs[i + 1] = shader->instrs[i];
+                        }
+                        shader->instrs_count = cnt;
+                }
+                last_input->regs[0]->flags |= IR3_REG_EI;
+        }
+        if (last_rel)
+                last_rel->flags |= IR3_INSTR_UL;
+        shader->instrs[shader->instrs_count++] = end;
+        shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+}
+void ir3_block_legalize(struct ir3_block *block,
+                bool *has_samp, int *max_bary)
+{
+        struct ir3_legalize_ctx ctx = {
+                        .block = block,
+                        .max_bary = -1,
+        };
+        legalize(&ctx);
+        *has_samp = ctx.has_samp;
+        *max_bary = ctx.max_bary;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_nir.h
 ,0 → 1,36
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef IR3_NIR_H_
+#define IR3_NIR_H_
+#include "glsl/nir/nir.h"
+bool ir3_nir_lower_if_else(nir_shader *shader);
+#endif /* IR3_NIR_H_ */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
 ,0 → 1,337
+/*
+ * Copyright © 2014 Intel Corporation
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *    Rob Clark (robclark@freedesktop.org)
+ *
+ */
+#include "ir3_nir.h"
+#include "glsl/nir/nir_builder.h"
+/* Based on nir_opt_peephole_select, and hacked up to more aggressively
+ * flatten anything that can be flattened
+ *
+ * This *might* be something that other drivers could use.  On the other
+ * hand, I think most other hw has predicated instructions or similar
+ * to select which side of if/else writes back result (and therefore
+ * not having to assign unique registers to both sides of the if/else.
+ * (And hopefully those drivers don't also have crazy scheduling reqs
+ * and can more easily do this in their backend.)
+ *
+ * TODO eventually when we have proper flow control in the backend:
+ *
+ *  + Probably weight differently normal ALUs vs SFUs (cos/rcp/exp)
+ *    since executing extra SFUs for the branch-not-taken path will
+ *    generally be much more expensive.
+ *
+ *    Possibly what constitutes an ALU vs SFU differs between hw
+ *    backends.. but that seems doubtful.
+ *
+ *  + Account for texture fetch and memory accesses (incl UBOs)
+ *    since these will be more expensive..
+ *
+ *  + When if-condition is const (or uniform) or we have some way
+ *    to know that all threads in the warp take the same branch
+ *    then we should prefer to not flatten the if/else..
+ */
+struct lower_state {
+        nir_builder b;
+        void *mem_ctx;
+        bool progress;
+};
+static bool
+valid_dest(nir_block *block, nir_dest *dest)
+{
+        /* It must be SSA */
+        if (!dest->is_ssa)
+                return false;
+        /* We only lower blocks that do not contain other blocks
+         * (so this is run iteratively in a loop).  Therefore if
+         * we get this far, it should not have any if_uses:
+         */
+        assert(list_empty(&dest->ssa.if_uses));
+        /* The only uses of this definition must be phi's in the
+         * successor or in the current block
+         */
+        nir_foreach_use(&dest->ssa, use) {
+                nir_instr *dest_instr = use->parent_instr;
+                if (dest_instr->block == block)
+                        continue;
+                if ((dest_instr->type == nir_instr_type_phi) &&
+                                (dest_instr->block == block->successors[0]))
+                        continue;
+                return false;
+        }
+        return true;
+}
+static bool
+block_check_for_allowed_instrs(nir_block *block)
+{
+        nir_foreach_instr(block, instr) {
+                switch (instr->type) {
+                case nir_instr_type_intrinsic: {
+                        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                        const nir_intrinsic_info *info =
+                                        &nir_intrinsic_infos[intr->intrinsic];
+                        switch (intr->intrinsic) {
+                        case nir_intrinsic_discard_if:
+                                /* to simplify things, we want discard_if src in ssa: */
+                                if (!intr->src[0].is_ssa)
+                                        return false;
+                                /* fallthrough */
+                        case nir_intrinsic_discard:
+                                /* discard/discard_if can be reordered, but only
+                                 * with some special care
+                                 */
+                                break;
+                        case nir_intrinsic_store_output:
+                                /* TODO technically, if both if and else store
+                                 * the same output, we can hoist that out to
+                                 * the end of the block w/ a phi..
+                                 * In practice, the tgsi shaders we already get
+                                 * do this for us, so I think we don't need to
+                                 */
+                        default:
+                                if (!(info->flags & NIR_INTRINSIC_CAN_REORDER))
+                                        return false;
+                        }
+                        break;
+                }
+                case nir_instr_type_tex: {
+                        nir_tex_instr *tex = nir_instr_as_tex(instr);
+                        if (!valid_dest(block, &tex->dest))
+                                return false;
+                        break;
+                }
+                case nir_instr_type_phi: {
+                        nir_phi_instr *phi = nir_instr_as_phi(instr);
+                        if (!valid_dest(block, &phi->dest))
+                                return false;
+                        break;
+                }
+                case nir_instr_type_alu: {
+                        nir_alu_instr *alu = nir_instr_as_alu(instr);
+                        if (!valid_dest(block, &alu->dest.dest))
+                                return false;
+                        break;
+                }
+                case nir_instr_type_load_const:
+                case nir_instr_type_ssa_undef:
+                        break; /* always ssa dest */
+                default:
+                        return false;
+                }
+        }
+        return true;
+}
+/* flatten an then or else block: */
+static void
+flatten_block(nir_builder *bld, nir_block *if_block, nir_block *prev_block,
+                nir_ssa_def *condition, bool invert)
+{
+        nir_foreach_instr_safe(if_block, instr) {
+                if (instr->type == nir_instr_type_intrinsic) {
+                        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+                        if ((intr->intrinsic == nir_intrinsic_discard) ||
+                                        (intr->intrinsic == nir_intrinsic_discard_if)) {
+                                nir_ssa_def *discard_cond;
+                                nir_builder_insert_after_instr(bld,
+                                                nir_block_last_instr(prev_block));
+                                if (invert) {
+                                        condition = nir_inot(bld, condition);
+                                        invert = false;
+                                }
+                                if (intr->intrinsic == nir_intrinsic_discard) {
+                                        discard_cond = condition;
+                                } else {
+                                        assert(intr->src[0].is_ssa);
+                                        /* discard_if gets re-written w/ src and'd: */
+                                        discard_cond = nir_iand(bld, condition, intr->src[0].ssa);
+                                }
+                                nir_intrinsic_instr *discard_if =
+                                                nir_intrinsic_instr_create(bld->shader,
+                                                                nir_intrinsic_discard_if);
+                                discard_if->src[0] = nir_src_for_ssa(discard_cond);
+                                nir_instr_insert_after(nir_block_last_instr(prev_block),
+                                                &discard_if->instr);
+                                nir_instr_remove(instr);
+                                instr = NULL;
+                        }
+                }
+                /* if not an handled specially, just move to prev block: */
+                if (instr) {
+                        /* NOTE: exec_node_remove() is safe here (vs nir_instr_remove()
+                         * since we are re-adding the instructin back in to the prev
+                         * block (so no dangling SSA uses)
+                         */
+                        exec_node_remove(&instr->node);
+                        instr->block = prev_block;
+                        exec_list_push_tail(&prev_block->instr_list, &instr->node);
+                }
+        }
+}
+static bool
+lower_if_else_block(nir_block *block, void *void_state)
+{
+        struct lower_state *state = void_state;
+        /* If the block is empty, then it certainly doesn't have any phi nodes,
+         * so we can skip it.  This also ensures that we do an early skip on the
+         * end block of the function which isn't actually attached to the CFG.
+         */
+        if (exec_list_is_empty(&block->instr_list))
+                return true;
+        if (nir_cf_node_is_first(&block->cf_node))
+                return true;
+        nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
+        if (prev_node->type != nir_cf_node_if)
+                return true;
+        nir_if *if_stmt = nir_cf_node_as_if(prev_node);
+        nir_cf_node *then_node = nir_if_first_then_node(if_stmt);
+        nir_cf_node *else_node = nir_if_first_else_node(if_stmt);
+        /* We can only have one block in each side ... */
+        if (nir_if_last_then_node(if_stmt) != then_node ||
+                        nir_if_last_else_node(if_stmt) != else_node)
+                return true;
+        nir_block *then_block = nir_cf_node_as_block(then_node);
+        nir_block *else_block = nir_cf_node_as_block(else_node);
+        /* ... and those blocks must only contain "allowed" instructions. */
+        if (!block_check_for_allowed_instrs(then_block) ||
+                        !block_check_for_allowed_instrs(else_block))
+                return true;
+        /* condition should be ssa too, which simplifies flatten_block: */
+        if (!if_stmt->condition.is_ssa)
+                return true;
+        /* At this point, we know that the previous CFG node is an if-then
+         * statement containing only moves to phi nodes in this block.  We can
+         * just remove that entire CF node and replace all of the phi nodes with
+         * selects.
+         */
+        nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node));
+        assert(prev_block->cf_node.type == nir_cf_node_block);
+        /* First, we move the remaining instructions from the blocks to the
+         * block before.  There are a few things that need handling specially
+         * like discard/discard_if.
+         */
+        flatten_block(&state->b, then_block, prev_block,
+                        if_stmt->condition.ssa, false);
+        flatten_block(&state->b, else_block, prev_block,
+                        if_stmt->condition.ssa, true);
+        nir_foreach_instr_safe(block, instr) {
+                if (instr->type != nir_instr_type_phi)
+                        break;
+                nir_phi_instr *phi = nir_instr_as_phi(instr);
+                nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
+                nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
+                /* Splat the condition to all channels */
+                memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
+                assert(exec_list_length(&phi->srcs) == 2);
+                nir_foreach_phi_src(phi, src) {
+                        assert(src->pred == then_block || src->pred == else_block);
+                        assert(src->src.is_ssa);
+                        unsigned idx = src->pred == then_block ? 1 : 2;
+                        nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
+                }
+                nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
+                                phi->dest.ssa.num_components, phi->dest.ssa.name);
+                sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+                nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+                                nir_src_for_ssa(&sel->dest.dest.ssa),
+                                state->mem_ctx);
+                nir_instr_insert_before(&phi->instr, &sel->instr);
+                nir_instr_remove(&phi->instr);
+        }
+        nir_cf_node_remove(&if_stmt->cf_node);
+        state->progress = true;
+        return true;
+}
+static bool
+lower_if_else_impl(nir_function_impl *impl)
+{
+        struct lower_state state;
+        state.mem_ctx = ralloc_parent(impl);
+        state.progress = false;
+        nir_builder_init(&state.b, impl);
+        nir_foreach_block(impl, lower_if_else_block, &state);
+        if (state.progress)
+                nir_metadata_preserve(impl, nir_metadata_none);
+        return state.progress;
+}
+bool
+ir3_nir_lower_if_else(nir_shader *shader)
+{
+        bool progress = false;
+        nir_foreach_overload(shader, overload) {
+                if (overload->impl)
+                        progress |= lower_if_else_impl(overload->impl);
+        }
+        return progress;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_ra.c
 ,0 → 1,659
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_shader_tokens.h"
+#include "util/u_math.h"
+#include "ir3.h"
+/*
+ * Register Assignment:
+ *
+ * NOTE: currently only works on a single basic block.. need to think
+ * about how multiple basic blocks are going to get scheduled.  But
+ * I think I want to re-arrange how blocks work, ie. get rid of the
+ * block nesting thing..
+ *
+ * NOTE: we could do register coalescing (eliminate moves) as part of
+ * the RA step.. OTOH I think we need to do scheduling before register
+ * assignment.  And if we remove a mov that effects scheduling (unless
+ * we leave a placeholder nop, which seems lame), so I'm not really
+ * sure how practical this is to do both in a single stage.  But OTOH
+ * I'm not really sure a sane way for the CP stage to realize when it
+ * cannot remove a mov due to multi-register constraints..
+ *
+ * NOTE: http://scopesconf.org/scopes-01/paper/session1_2.ps.gz has
+ * some ideas to handle array allocation with a more conventional
+ * graph coloring algorithm for register assignment, which might be
+ * a good alternative to the current algo.  However afaict it cannot
+ * handle overlapping arrays, which is a scenario that we have to
+ * deal with
+ */
+struct ir3_ra_ctx {
+        struct ir3_block *block;
+        enum shader_t type;
+        bool frag_coord;
+        bool frag_face;
+        int cnt;
+        bool error;
+        struct {
+                unsigned base;
+                unsigned size;
+        } arrays[MAX_ARRAYS];
+};
+#ifdef DEBUG
+#  include "freedreno_util.h"
+#  define ra_debug (fd_mesa_debug & FD_DBG_OPTMSGS)
+#else
+#  define ra_debug 0
+#endif
+#define ra_dump_list(msg, n) do { \
+                if (ra_debug) { \
+                        debug_printf("-- " msg); \
+                        ir3_dump_instr_list(n); \
+                } \
+        } while (0)
+#define ra_dump_instr(msg, n) do { \
+                if (ra_debug) { \
+                        debug_printf(">> " msg); \
+                        ir3_dump_instr_single(n); \
+                } \
+        } while (0)
+#define ra_assert(ctx, x) do { \
+                debug_assert(x); \
+                if (!(x)) { \
+                        debug_printf("RA: failed assert: %s\n", #x); \
+                        (ctx)->error = true; \
+                }; \
+        } while (0)
+/* sorta ugly way to retrofit half-precision support.. rather than
+ * passing extra param around, just OR in a high bit.  All the low
+ * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
+ * will continue to work as long as you don't underflow (and that
+ * would go badly anyways).
+ */
+#define REG_HALF  0x8000
+#define REG(n, wm, f) (struct ir3_register){ \
+                .flags  = (f), \
+                .num    = (n), \
+                .wrmask = TGSI_WRITEMASK_ ## wm, \
+        }
+/* check that the register exists, is a GPR and is not special (a0/p0) */
+static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
+{
+        if ((n < instr->regs_count) && reg_gpr(instr->regs[n]) &&
+                        !(instr->regs[n]->flags & IR3_REG_SSA))
+                return instr->regs[n];
+        return NULL;
+}
+/* figure out if an unassigned src register points back to the instr we
+ * are assigning:
+ */
+static bool instr_used_by(struct ir3_instruction *instr,
+                struct ir3_register *src)
+{
+        struct ir3_instruction *src_instr = ssa(src);
+        unsigned i;
+        if (instr == src_instr)
+                return true;
+        if (src_instr && is_meta(src_instr))
+                for (i = 1; i < src_instr->regs_count; i++)
+                        if (instr_used_by(instr, src_instr->regs[i]))
+                                return true;
+        return false;
+}
+static bool instr_is_output(struct ir3_instruction *instr)
+{
+        struct ir3_block *block = instr->block;
+        unsigned i;
+        for (i = 0; i < block->noutputs; i++)
+                if (instr == block->outputs[i])
+                        return true;
+        return false;
+}
+static void mark_sources(struct ir3_instruction *instr,
+                struct ir3_instruction *n, regmask_t *liveregs, regmask_t *written)
+{
+        unsigned i;
+        for (i = 1; i < n->regs_count; i++) {
+                struct ir3_register *r = reg_check(n, i);
+                if (r)
+                        regmask_set_if_not(liveregs, r, written);
+                /* if any src points back to the instruction(s) in
+                 * the block of neighbors that we are assigning then
+                 * mark any written (clobbered) registers as live:
+                 */
+                if (instr_used_by(instr, n->regs[i]))
+                        regmask_or(liveregs, liveregs, written);
+        }
+}
+/* live means read before written */
+static void compute_liveregs(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr, regmask_t *liveregs)
+{
+        struct ir3_block *block = instr->block;
+        struct ir3_instruction *n;
+        regmask_t written;
+        unsigned i;
+        regmask_init(&written);
+        for (n = instr->next; n; n = n->next) {
+                struct ir3_register *r;
+                if (is_meta(n))
+                        continue;
+                /* check first src's read: */
+                mark_sources(instr, n, liveregs, &written);
+                /* for instructions that write to an array, we need to
+                 * capture the dependency on the array elements:
+                 */
+                if (n->fanin)
+                        mark_sources(instr, n->fanin, liveregs, &written);
+                /* meta-instructions don't actually get scheduled,
+                 * so don't let it's write confuse us.. what we
+                 * really care about is when the src to the meta
+                 * instr was written:
+                 */
+                if (is_meta(n))
+                        continue;
+                /* then dst written (if assigned already): */
+                r = reg_check(n, 0);
+                if (r) {
+                        /* if an instruction *is* an output, then it is live */
+                        if (!instr_is_output(n))
+                                regmask_set(&written, r);
+                }
+        }
+        /* be sure to account for output registers too: */
+        for (i = 0; i < block->noutputs; i++) {
+                struct ir3_register *r;
+                if (!block->outputs[i])
+                        continue;
+                r = reg_check(block->outputs[i], 0);
+                if (r)
+                        regmask_set_if_not(liveregs, r, &written);
+        }
+        /* if instruction is output, we need a reg that isn't written
+         * before the end.. equiv to the instr_used_by() check above
+         * in the loop body
+         * TODO maybe should follow fanin/fanout?
+         */
+        if (instr_is_output(instr))
+                regmask_or(liveregs, liveregs, &written);
+}
+static int find_available(regmask_t *liveregs, int size, bool half)
+{
+        unsigned i;
+        unsigned f = half ? IR3_REG_HALF : 0;
+        for (i = 0; i < MAX_REG - size; i++) {
+                if (!regmask_get(liveregs, &REG(i, X, f))) {
+                        unsigned start = i++;
+                        for (; (i < MAX_REG) && ((i - start) < size); i++)
+                                if (regmask_get(liveregs, &REG(i, X, f)))
+                                        break;
+                        if ((i - start) >= size)
+                                return start;
+                }
+        }
+        assert(0);
+        return -1;
+}
+static int alloc_block(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr, int size)
+{
+        struct ir3_register *dst = instr->regs[0];
+        struct ir3_instruction *n;
+        regmask_t liveregs;
+        unsigned name;
+        /* should only ever be called w/ head of neighbor list: */
+        debug_assert(!instr->cp.left);
+        regmask_init(&liveregs);
+        for (n = instr; n; n = n->cp.right)
+                compute_liveregs(ctx, n, &liveregs);
+        /* because we do assignment on fanout nodes for wrmask!=0x1, we
+         * need to handle this special case, where the fanout nodes all
+         * appear after one or more of the consumers of the src node:
+         *
+         *   0098:009: sam _, r2.x
+         *   0028:010: mul.f r3.z, r4.x, c13.x
+         *   ; we start assigning here for '0098:009: sam'.. but
+         *   ; would miss the usage at '0028:010: mul.f'
+         *   0101:009: _meta:fo _, _[0098:009: sam], off=2
+         */
+        if (is_meta(instr) && (instr->opc == OPC_META_FO))
+                compute_liveregs(ctx, instr->regs[1]->instr, &liveregs);
+        name = find_available(&liveregs, size,
+                        !!(dst->flags & IR3_REG_HALF));
+        if (dst->flags & IR3_REG_HALF)
+                name |= REG_HALF;
+        return name;
+}
+static type_t half_type(type_t type)
+{
+        switch (type) {
+        case TYPE_F32: return TYPE_F16;
+        case TYPE_U32: return TYPE_U16;
+        case TYPE_S32: return TYPE_S16;
+        /* instructions may already be fixed up: */
+        case TYPE_F16:
+        case TYPE_U16:
+        case TYPE_S16:
+                return type;
+        default:
+                assert(0);
+                return ~0;
+        }
+}
+/* some instructions need fix-up if dst register is half precision: */
+static void fixup_half_instr_dst(struct ir3_instruction *instr)
+{
+        switch (instr->category) {
+        case 1: /* move instructions */
+                instr->cat1.dst_type = half_type(instr->cat1.dst_type);
+                break;
+        case 3:
+                switch (instr->opc) {
+                case OPC_MAD_F32:
+                        instr->opc = OPC_MAD_F16;
+                        break;
+                case OPC_SEL_B32:
+                        instr->opc = OPC_SEL_B16;
+                        break;
+                case OPC_SEL_S32:
+                        instr->opc = OPC_SEL_S16;
+                        break;
+                case OPC_SEL_F32:
+                        instr->opc = OPC_SEL_F16;
+                        break;
+                case OPC_SAD_S32:
+                        instr->opc = OPC_SAD_S16;
+                        break;
+                /* instructions may already be fixed up: */
+                case OPC_MAD_F16:
+                case OPC_SEL_B16:
+                case OPC_SEL_S16:
+                case OPC_SEL_F16:
+                case OPC_SAD_S16:
+                        break;
+                default:
+                        assert(0);
+                        break;
+                }
+                break;
+        case 5:
+                instr->cat5.type = half_type(instr->cat5.type);
+                break;
+        }
+}
+/* some instructions need fix-up if src register is half precision: */
+static void fixup_half_instr_src(struct ir3_instruction *instr)
+{
+        switch (instr->category) {
+        case 1: /* move instructions */
+                instr->cat1.src_type = half_type(instr->cat1.src_type);
+                break;
+        }
+}
+static void reg_assign(struct ir3_instruction *instr,
+                unsigned r, unsigned name)
+{
+        struct ir3_register *reg = instr->regs[r];
+        reg->flags &= ~IR3_REG_SSA;
+        reg->num = name & ~REG_HALF;
+        if (name & REG_HALF) {
+                reg->flags |= IR3_REG_HALF;
+                /* if dst reg being assigned, patch up the instr: */
+                if (reg == instr->regs[0])
+                        fixup_half_instr_dst(instr);
+                else
+                        fixup_half_instr_src(instr);
+        }
+}
+static void instr_assign(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr, unsigned name);
+static void instr_assign_src(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr, unsigned r, unsigned name)
+{
+        struct ir3_register *reg = instr->regs[r];
+        if (reg->flags & IR3_REG_RELATIV)
+                name += reg->offset;
+        reg_assign(instr, r, name);
+        if (is_meta(instr)) {
+                switch (instr->opc) {
+                case OPC_META_INPUT:
+                        /* shader-input does not have a src, only block input: */
+                        debug_assert(instr->regs_count == 2);
+                        instr_assign(ctx, instr, name);
+                        return;
+                case OPC_META_FO:
+                        instr_assign(ctx, instr, name + instr->fo.off);
+                        return;
+                case OPC_META_FI:
+                        instr_assign(ctx, instr, name - (r - 1));
+                        return;
+                default:
+                        break;
+                }
+        }
+}
+static void instr_assign_srcs(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr, unsigned name)
+{
+        struct ir3_instruction *n, *src;
+        for (n = instr->next; n && !ctx->error; n = n->next) {
+                foreach_ssa_src_n(src, i, n) {
+                        unsigned r = i + 1;
+                        /* skip address / etc (non real sources): */
+                        if (r >= n->regs_count)
+                                continue;
+                        if (src == instr)
+                                instr_assign_src(ctx, n, r, name);
+                }
+        }
+}
+static void instr_assign(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr, unsigned name)
+{
+        struct ir3_register *reg = instr->regs[0];
+        if (reg->flags & IR3_REG_RELATIV)
+                return;
+        /* check if already assigned: */
+        if (!(reg->flags & IR3_REG_SSA)) {
+                /* ... and if so, sanity check: */
+                ra_assert(ctx, reg->num == (name & ~REG_HALF));
+                return;
+        }
+        /* rename this instructions dst register: */
+        reg_assign(instr, 0, name);
+        /* and rename any subsequent use of result of this instr: */
+        instr_assign_srcs(ctx, instr, name);
+        /* To simplify the neighbor logic, and to "avoid" dealing with
+         * instructions which write more than one output, we actually
+         * do register assignment for instructions that produce multiple
+         * outputs on the fanout nodes and propagate up the assignment
+         * to the actual instruction:
+         */
+        if (is_meta(instr) && (instr->opc == OPC_META_FO)) {
+                struct ir3_instruction *src;
+                debug_assert(name >= instr->fo.off);
+                foreach_ssa_src(src, instr)
+                        instr_assign(ctx, src, name - instr->fo.off);
+        }
+}
+/* check neighbor list to see if it is already partially (or completely)
+ * assigned, in which case register block is already allocated and we
+ * just need to complete the assignment:
+ */
+static int check_partial_assignment(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        struct ir3_instruction *n;
+        int off = 0;
+        debug_assert(!instr->cp.left);
+        for (n = instr; n; n = n->cp.right) {
+                struct ir3_register *dst = n->regs[0];
+                if ((n->depth != DEPTH_UNUSED) &&
+                                !(dst->flags & IR3_REG_SSA)) {
+                        int name = dst->num - off;
+                        debug_assert(name >= 0);
+                        return name;
+                }
+                off++;
+        }
+        return -1;
+}
+/* allocate register name(s) for a list of neighboring instructions;
+ * instr should point to leftmost neighbor (head of list)
+ */
+static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        struct ir3_instruction *n;
+        struct ir3_register *dst;
+        int name;
+        debug_assert(!instr->cp.left);
+        if (instr->regs_count == 0)
+                return;
+        dst = instr->regs[0];
+        /* For indirect dst, take the register assignment from the
+         * fanin and propagate it forward.
+         */
+        if (dst->flags & IR3_REG_RELATIV) {
+                /* NOTE can be grouped, if for example outputs:
+                 * for now disable cp if indirect writes
+                 */
+                instr_alloc_and_assign(ctx, instr->fanin);
+                dst->num += instr->fanin->regs[0]->num;
+                dst->flags &= ~IR3_REG_SSA;
+                instr_assign_srcs(ctx, instr, instr->fanin->regs[0]->num);
+                return;
+        }
+        /* for instructions w/ fanouts, do the actual register assignment
+         * on the group of fanout neighbor nodes and propagate the reg
+         * name back up to the texture instruction.
+         */
+        if (dst->wrmask != 0x1)
+                return;
+        name = check_partial_assignment(ctx, instr);
+        /* allocate register(s): */
+        if (name >= 0) {
+                /* already partially assigned, just finish the job */
+        } else if (reg_gpr(dst)) {
+                int size;
+                /* number of consecutive registers to assign: */
+                size = ir3_neighbor_count(instr);
+                if (dst->wrmask != 0x1)
+                        size = MAX2(size, ffs(~dst->wrmask) - 1);
+                name = alloc_block(ctx, instr, size);
+        } else if (dst->flags & IR3_REG_ADDR) {
+                debug_assert(!instr->cp.right);
+                dst->flags &= ~IR3_REG_ADDR;
+                name = regid(REG_A0, 0) | REG_HALF;
+        } else {
+                debug_assert(!instr->cp.right);
+                /* predicate register (p0).. etc */
+                name = regid(REG_P0, 0);
+                debug_assert(dst->num == name);
+        }
+        ra_assert(ctx, name >= 0);
+        for (n = instr; n && !ctx->error; n = n->cp.right) {
+                instr_assign(ctx, n, name);
+                name++;
+        }
+}
+static void instr_assign_array(struct ir3_ra_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        struct ir3_instruction *src;
+        int name, aid = instr->fi.aid;
+        if (ctx->arrays[aid].base == ~0) {
+                int size = instr->regs_count - 1;
+                ctx->arrays[aid].base = alloc_block(ctx, instr, size);
+                ctx->arrays[aid].size = size;
+        }
+        name = ctx->arrays[aid].base;
+        foreach_ssa_src_n(src, i, instr) {
+                unsigned r = i + 1;
+                /* skip address / etc (non real sources): */
+                if (r >= instr->regs_count)
+                        break;
+                instr_assign(ctx, src, name);
+                name++;
+        }
+}
+static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+        struct ir3_instruction *n;
+        /* frag shader inputs get pre-assigned, since we have some
+         * constraints/unknowns about setup for some of these regs:
+         */
+        if ((ctx->type == SHADER_FRAGMENT) && !block->parent) {
+                unsigned i = 0, j;
+                if (ctx->frag_face && (i < block->ninputs) && block->inputs[i]) {
+                        /* if we have frag_face, it gets hr0.x */
+                        instr_assign(ctx, block->inputs[i], REG_HALF | 0);
+                        i += 4;
+                }
+                for (j = 0; i < block->ninputs; i++, j++)
+                        if (block->inputs[i])
+                                instr_assign(ctx, block->inputs[i], j);
+        }
+        ra_dump_list("-------\n", block->head);
+        /* first pass, assign arrays: */
+        for (n = block->head; n && !ctx->error; n = n->next) {
+                if (is_meta(n) && (n->opc == OPC_META_FI) && n->fi.aid) {
+                        debug_assert(!n->cp.left);  /* don't think this should happen */
+                        ra_dump_instr("ASSIGN ARRAY: ", n);
+                        instr_assign_array(ctx, n);
+                        ra_dump_list("-------\n", block->head);
+                }
+        }
+        for (n = block->head; n && !ctx->error; n = n->next) {
+                ra_dump_instr("ASSIGN: ", n);
+                instr_alloc_and_assign(ctx, ir3_neighbor_first(n));
+                ra_dump_list("-------\n", block->head);
+        }
+        return ctx->error ? -1 : 0;
+}
+int ir3_block_ra(struct ir3_block *block, enum shader_t type,
+                bool frag_coord, bool frag_face)
+{
+        struct ir3_instruction *n;
+        struct ir3_ra_ctx ctx = {
+                        .block = block,
+                        .type = type,
+                        .frag_coord = frag_coord,
+                        .frag_face = frag_face,
+        };
+        int ret;
+        memset(&ctx.arrays, ~0, sizeof(ctx.arrays));
+        /* mark dst registers w/ SSA flag so we can see which
+         * have been assigned so far:
+         * NOTE: we really should set SSA flag consistently on
+         * every dst register in the frontend.
+         */
+        for (n = block->head; n; n = n->next)
+                if (n->regs_count > 0)
+                        n->regs[0]->flags |= IR3_REG_SSA;
+        ir3_clear_mark(block->shader);
+        ret = block_ra(&ctx, block);
+        return ret;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_sched.c
 ,0 → 1,471
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "util/u_math.h"
+#include "ir3.h"
+enum {
+        SCHEDULED = -1,
+        DELAYED = -2,
+};
+/*
+ * Instruction Scheduling:
+ *
+ * Using the depth sorted list from depth pass, attempt to recursively
+ * schedule deepest unscheduled path.  The first instruction that cannot
+ * be scheduled, returns the required delay slots it needs, at which
+ * point we return back up to the top and attempt to schedule by next
+ * highest depth.  After a sufficient number of instructions have been
+ * scheduled, return back to beginning of list and start again.  If you
+ * reach the end of depth sorted list without being able to insert any
+ * instruction, insert nop's.  Repeat until no more unscheduled
+ * instructions.
+ *
+ * There are a few special cases that need to be handled, since sched
+ * is currently independent of register allocation.  Usages of address
+ * register (a0.x) or predicate register (p0.x) must be serialized.  Ie.
+ * if you have two pairs of instructions that write the same special
+ * register and then read it, then those pairs cannot be interleaved.
+ * To solve this, when we are in such a scheduling "critical section",
+ * and we encounter a conflicting write to a special register, we try
+ * to schedule any remaining instructions that use that value first.
+ */
+struct ir3_sched_ctx {
+        struct ir3_instruction *scheduled; /* last scheduled instr */
+        struct ir3_instruction *addr;      /* current a0.x user, if any */
+        struct ir3_instruction *pred;      /* current p0.x user, if any */
+        unsigned cnt;
+        bool error;
+};
+static struct ir3_instruction *
+deepest(struct ir3_instruction **srcs, unsigned nsrcs)
+{
+        struct ir3_instruction *d = NULL;
+        unsigned i = 0, id = 0;
+        while ((i < nsrcs) && !(d = srcs[id = i]))
+                i++;
+        if (!d)
+                return NULL;
+        for (; i < nsrcs; i++)
+                if (srcs[i] && (srcs[i]->depth > d->depth))
+                        d = srcs[id = i];
+        srcs[id] = NULL;
+        return d;
+}
+static unsigned distance(struct ir3_sched_ctx *ctx,
+                struct ir3_instruction *instr, unsigned maxd)
+{
+        struct ir3_instruction *n = ctx->scheduled;
+        unsigned d = 0;
+        while (n && (n != instr) && (d < maxd)) {
+                if (is_alu(n) || is_flow(n))
+                        d++;
+                n = n->next;
+        }
+        return d;
+}
+/* TODO maybe we want double linked list? */
+static struct ir3_instruction * prev(struct ir3_instruction *instr)
+{
+        struct ir3_instruction *p = instr->block->head;
+        while (p && (p->next != instr))
+                p = p->next;
+        return p;
+}
+static bool is_sfu_or_mem(struct ir3_instruction *instr)
+{
+        return is_sfu(instr) || is_mem(instr);
+}
+static void schedule(struct ir3_sched_ctx *ctx,
+                struct ir3_instruction *instr, bool remove)
+{
+        struct ir3_block *block = instr->block;
+        /* maybe there is a better way to handle this than just stuffing
+         * a nop.. ideally we'd know about this constraint in the
+         * scheduling and depth calculation..
+         */
+        if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
+                schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
+        /* remove from depth list:
+         */
+        if (remove) {
+                struct ir3_instruction *p = prev(instr);
+                /* NOTE: this can happen for inputs which are not
+                 * read.. in that case there is no need to schedule
+                 * the input, so just bail:
+                 */
+                if (instr != (p ? p->next : block->head))
+                        return;
+                if (p)
+                        p->next = instr->next;
+                else
+                        block->head = instr->next;
+        }
+        if (writes_addr(instr)) {
+                assert(ctx->addr == NULL);
+                ctx->addr = instr;
+        }
+        if (writes_pred(instr)) {
+                assert(ctx->pred == NULL);
+                ctx->pred = instr;
+        }
+        instr->flags |= IR3_INSTR_MARK;
+        instr->next = ctx->scheduled;
+        ctx->scheduled = instr;
+        ctx->cnt++;
+}
+/*
+ * Delay-slot calculation.  Follows fanin/fanout.
+ */
+/* calculate delay for specified src: */
+static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx,
+                struct ir3_instruction *assigner,
+                struct ir3_instruction *consumer, unsigned srcn)
+{
+        unsigned delay = 0;
+        if (is_meta(assigner)) {
+                struct ir3_instruction *src;
+                foreach_ssa_src(src, assigner) {
+                        unsigned d = delay_calc_srcn(ctx, src, consumer, srcn);
+                        delay = MAX2(delay, d);
+                }
+        } else {
+                delay = ir3_delayslots(assigner, consumer, srcn);
+                delay -= distance(ctx, assigner, delay);
+        }
+        return delay;
+}
+/* calculate delay for instruction (maximum of delay for all srcs): */
+static unsigned delay_calc(struct ir3_sched_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        unsigned delay = 0;
+        struct ir3_instruction *src;
+        foreach_ssa_src_n(src, i, instr) {
+                unsigned d = delay_calc_srcn(ctx, src, instr, i);
+                delay = MAX2(delay, d);
+        }
+        return delay;
+}
+/* A negative return value signals that an instruction has been newly
+ * SCHEDULED (or DELAYED due to address or predicate register already
+ * in use), return back up to the top of the stack (to block_sched())
+ */
+static int trysched(struct ir3_sched_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        struct ir3_instruction *srcs[64];
+        struct ir3_instruction *src;
+        unsigned delay, nsrcs = 0;
+        /* if already scheduled: */
+        if (instr->flags & IR3_INSTR_MARK)
+                return 0;
+        /* figure out our src's, copy 'em out into an array for sorting: */
+        foreach_ssa_src(src, instr) {
+                debug_assert(nsrcs < ARRAY_SIZE(srcs));
+                srcs[nsrcs++] = src;
+        }
+        /* for each src register in sorted order:
+         */
+        delay = 0;
+        while ((src = deepest(srcs, nsrcs))) {
+                delay = trysched(ctx, src);
+                if (delay)
+                        return delay;
+        }
+        /* all our dependents are scheduled, figure out if
+         * we have enough delay slots to schedule ourself:
+         */
+        delay = delay_calc(ctx, instr);
+        if (delay)
+                return delay;
+        /* if the instruction is a kill, we need to ensure *every*
+         * bary.f is scheduled.  The hw seems unhappy if the thread
+         * gets killed before the end-input (ei) flag is hit.
+         *
+         * We could do this by adding each bary.f instruction as
+         * virtual ssa src for the kill instruction.  But we have
+         * fixed length instr->regs[].
+         *
+         * TODO this wouldn't be quite right if we had multiple
+         * basic blocks, if any block was conditional.  We'd need
+         * to schedule the bary.f's outside of any block which
+         * was conditional that contained a kill.. I think..
+         */
+        if (is_kill(instr)) {
+                struct ir3 *ir = instr->block->shader;
+                unsigned i;
+                for (i = 0; i < ir->baryfs_count; i++) {
+                        struct ir3_instruction *baryf = ir->baryfs[i];
+                        if (baryf->depth == DEPTH_UNUSED)
+                                continue;
+                        delay = trysched(ctx, baryf);
+                        if (delay)
+                                return delay;
+                }
+        }
+        /* if this is a write to address/predicate register, and that
+         * register is currently in use, we need to defer until it is
+         * free:
+         */
+        if (writes_addr(instr) && ctx->addr) {
+                assert(ctx->addr != instr);
+                return DELAYED;
+        }
+        if (writes_pred(instr) && ctx->pred) {
+                assert(ctx->pred != instr);
+                return DELAYED;
+        }
+        schedule(ctx, instr, true);
+        return SCHEDULED;
+}
+static struct ir3_instruction * reverse(struct ir3_instruction *instr)
+{
+        struct ir3_instruction *reversed = NULL;
+        while (instr) {
+                struct ir3_instruction *next = instr->next;
+                instr->next = reversed;
+                reversed = instr;
+                instr = next;
+        }
+        return reversed;
+}
+static bool uses_current_addr(struct ir3_sched_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        return instr->address && (ctx->addr == instr->address);
+}
+static bool uses_current_pred(struct ir3_sched_ctx *ctx,
+                struct ir3_instruction *instr)
+{
+        struct ir3_instruction *src;
+        foreach_ssa_src(src, instr)
+                if (ctx->pred == src)
+                        return true;
+        return false;
+}
+/* when we encounter an instruction that writes to the address register
+ * when it is in use, we delay that instruction and try to schedule all
+ * other instructions using the current address register:
+ */
+static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
+                struct ir3_block *block)
+{
+        struct ir3_instruction *instr = block->head;
+        bool addr_in_use = false;
+        bool pred_in_use = false;
+        bool all_delayed = true;
+        unsigned cnt = ~0, attempted = 0;
+        while (instr) {
+                struct ir3_instruction *next = instr->next;
+                bool addr = uses_current_addr(ctx, instr);
+                bool pred = uses_current_pred(ctx, instr);
+                if (addr || pred) {
+                        int ret = trysched(ctx, instr);
+                        if (ret != DELAYED)
+                                all_delayed = false;
+                        if (ret == SCHEDULED)
+                                cnt = 0;
+                        else if (ret > 0)
+                                cnt = MIN2(cnt, ret);
+                        if (addr)
+                                addr_in_use = true;
+                        if (pred)
+                                pred_in_use = true;
+                        attempted++;
+                }
+                instr = next;
+        }
+        if (!addr_in_use)
+                ctx->addr = NULL;
+        if (!pred_in_use)
+                ctx->pred = NULL;
+        /* detect if we've gotten ourselves into an impossible situation
+         * and bail if needed
+         */
+        if (all_delayed && (attempted > 0)) {
+                if (pred_in_use) {
+                        /* TODO we probably need to keep a list of instructions
+                         * that reference predicate, similar to indirects
+                         */
+                        ctx->error = true;
+                        return DELAYED;
+                }
+                if (addr_in_use) {
+                        struct ir3 *ir = ctx->addr->block->shader;
+                        struct ir3_instruction *new_addr =
+                                        ir3_instr_clone(ctx->addr);
+                        unsigned i;
+                        /* original addr is scheduled, but new one isn't: */
+                        new_addr->flags &= ~IR3_INSTR_MARK;
+                        for (i = 0; i < ir->indirects_count; i++) {
+                                struct ir3_instruction *indirect = ir->indirects[i];
+                                /* skip instructions already scheduled: */
+                                if (indirect->flags & IR3_INSTR_MARK)
+                                        continue;
+                                /* remap remaining instructions using current addr
+                                 * to new addr:
+                                 */
+                                if (indirect->address == ctx->addr)
+                                        indirect->address = new_addr;
+                        }
+                        /* all remaining indirects remapped to new addr: */
+                        ctx->addr = NULL;
+                        /* not really, but this will trigger us to go back to
+                         * main trysched() loop now that we've resolved the
+                         * conflict by duplicating the instr that writes to
+                         * the address register.
+                         */
+                        return SCHEDULED;
+                }
+        }
+        return cnt;
+}
+static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
+{
+        struct ir3_instruction *instr;
+        /* schedule all the shader input's (meta-instr) first so that
+         * the RA step sees that the input registers contain a value
+         * from the start of the shader:
+         */
+        if (!block->parent) {
+                unsigned i;
+                for (i = 0; i < block->ninputs; i++) {
+                        struct ir3_instruction *in = block->inputs[i];
+                        if (in)
+                                schedule(ctx, in, true);
+                }
+        }
+        while ((instr = block->head) && !ctx->error) {
+                /* NOTE: always grab next *before* trysched(), in case the
+                 * instruction is actually scheduled (and therefore moved
+                 * from depth list into scheduled list)
+                 */
+                struct ir3_instruction *next = instr->next;
+                int cnt = trysched(ctx, instr);
+                if (cnt == DELAYED)
+                        cnt = block_sched_undelayed(ctx, block);
+                /* -1 is signal to return up stack, but to us means same as 0: */
+                cnt = MAX2(0, cnt);
+                cnt += ctx->cnt;
+                instr = next;
+                /* if deepest remaining instruction cannot be scheduled, try
+                 * the increasingly more shallow instructions until needed
+                 * number of delay slots is filled:
+                 */
+                while (instr && (cnt > ctx->cnt)) {
+                        next = instr->next;
+                        trysched(ctx, instr);
+                        instr = next;
+                }
+                /* and if we run out of instructions that can be scheduled,
+                 * then it is time for nop's:
+                 */
+                while (cnt > ctx->cnt)
+                        schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false);
+        }
+        /* at this point, scheduled list is in reverse order, so fix that: */
+        block->head = reverse(ctx->scheduled);
+}
+int ir3_block_sched(struct ir3_block *block)
+{
+        struct ir3_sched_ctx ctx = {0};
+        ir3_clear_mark(block->shader);
+        block_sched(&ctx, block);
+        if (ctx.error)
+                return -1;
+        return 0;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_shader.c
 ,0 → 1,293
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+#include "ir3_shader.h"
+#include "ir3_compiler.h"
+static void
+delete_variant(struct ir3_shader_variant *v)
+{
+        if (v->ir)
+                ir3_destroy(v->ir);
+        fd_bo_del(v->bo);
+        free(v);
+}
+/* for vertex shader, the inputs are loaded into registers before the shader
+ * is executed, so max_regs from the shader instructions might not properly
+ * reflect the # of registers actually used, especially in case passthrough
+ * varyings.
+ *
+ * Likewise, for fragment shader, we can have some regs which are passed
+ * input values but never touched by the resulting shader (ie. as result
+ * of dead code elimination or simply because we don't know how to turn
+ * the reg off.
+ */
+static void
+fixup_regfootprint(struct ir3_shader_variant *v)
+{
+        if (v->type == SHADER_VERTEX) {
+                unsigned i;
+                for (i = 0; i < v->inputs_count; i++) {
+                        /* skip frag inputs fetch via bary.f since their reg's are
+                         * not written by gpu before shader starts (and in fact the
+                         * regid's might not even be valid)
+                         */
+                        if (v->inputs[i].bary)
+                                continue;
+                        if (v->inputs[i].compmask) {
+                                int32_t regid = (v->inputs[i].regid + 3) >> 2;
+                                v->info.max_reg = MAX2(v->info.max_reg, regid);
+                        }
+                }
+                for (i = 0; i < v->outputs_count; i++) {
+                        int32_t regid = (v->outputs[i].regid + 3) >> 2;
+                        v->info.max_reg = MAX2(v->info.max_reg, regid);
+                }
+        } else if (v->type == SHADER_FRAGMENT) {
+                /* NOTE: not sure how to turn pos_regid off..  but this could
+                 * be, for example, r1.x while max reg used by the shader is
+                 * r0.*, in which case we need to fixup the reg footprint:
+                 */
+                v->info.max_reg = MAX2(v->info.max_reg, v->pos_regid >> 2);
+                if (v->frag_coord)
+                        debug_assert(v->info.max_reg >= 0); /* hard coded r0.x */
+                if (v->frag_face)
+                        debug_assert(v->info.max_half_reg >= 0); /* hr0.x */
+        }
+}
+/* wrapper for ir3_assemble() which does some info fixup based on
+ * shader state.  Non-static since used by ir3_cmdline too.
+ */
+void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id)
+{
+        void *bin;
+        bin = ir3_assemble(v->ir, &v->info, gpu_id);
+        if (!bin)
+                return NULL;
+        if (gpu_id >= 400) {
+                v->instrlen = v->info.sizedwords / (2 * 16);
+        } else {
+                v->instrlen = v->info.sizedwords / (2 * 4);
+        }
+        /* NOTE: if relative addressing is used, we set constlen in
+         * the compiler (to worst-case value) since we don't know in
+         * the assembler what the max addr reg value can be:
+         */
+        v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
+        fixup_regfootprint(v);
+        return bin;
+}
+static void
+assemble_variant(struct ir3_shader_variant *v)
+{
+        struct fd_context *ctx = fd_context(v->shader->pctx);
+        uint32_t gpu_id = ir3_shader_gpuid(v->shader);
+        uint32_t sz, *bin;
+        bin = ir3_shader_assemble(v, gpu_id);
+        sz = v->info.sizedwords * 4;
+        v->bo = fd_bo_new(ctx->dev, sz,
+                        DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+                        DRM_FREEDRENO_GEM_TYPE_KMEM);
+        memcpy(fd_bo_map(v->bo), bin, sz);
+        free(bin);
+        /* no need to keep the ir around beyond this point: */
+        ir3_destroy(v->ir);
+        v->ir = NULL;
+}
+/* reset before attempting to compile again.. */
+static void reset_variant(struct ir3_shader_variant *v, const char *msg)
+{
+        debug_error(msg);
+        v->inputs_count = 0;
+        v->outputs_count = 0;
+        v->total_in = 0;
+        v->has_samp = false;
+        v->immediates_count = 0;
+}
+static struct ir3_shader_variant *
+create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
+{
+        struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
+        const struct tgsi_token *tokens = shader->tokens;
+        int ret;
+        if (!v)
+                return NULL;
+        v->shader = shader;
+        v->key = key;
+        v->type = shader->type;
+        if (fd_mesa_debug & FD_DBG_DISASM) {
+                DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
+                        key.binning_pass, key.color_two_side, key.half_precision);
+                tgsi_dump(tokens, 0);
+        }
+        if (fd_mesa_debug & FD_DBG_NIR) {
+                ret = ir3_compile_shader_nir(v, tokens, key);
+                if (ret)
+                        reset_variant(v, "NIR compiler failed, fallback to TGSI!");
+        } else {
+                ret = -1;
+        }
+        if (ret) {
+                ret = ir3_compile_shader(v, tokens, key, true);
+                if (ret) {
+                        reset_variant(v, "new compiler failed, trying without copy propagation!");
+                        ret = ir3_compile_shader(v, tokens, key, false);
+                }
+        }
+        if (ret) {
+                debug_error("compile failed!");
+                goto fail;
+        }
+        assemble_variant(v);
+        if (!v->bo) {
+                debug_error("assemble failed!");
+                goto fail;
+        }
+        if (fd_mesa_debug & FD_DBG_DISASM) {
+                DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
+                        key.binning_pass, key.color_two_side, key.half_precision);
+                disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
+        }
+        return v;
+fail:
+        delete_variant(v);
+        return NULL;
+}
+uint32_t
+ir3_shader_gpuid(struct ir3_shader *shader)
+{
+        struct fd_context *ctx = fd_context(shader->pctx);
+        return ctx->screen->gpu_id;
+}
+struct ir3_shader_variant *
+ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
+{
+        struct ir3_shader_variant *v;
+        /* some shader key values only apply to vertex or frag shader,
+         * so normalize the key to avoid constructing multiple identical
+         * variants:
+         */
+        switch (shader->type) {
+        case SHADER_FRAGMENT:
+        case SHADER_COMPUTE:
+                key.binning_pass = false;
+                if (key.has_per_samp) {
+                        key.vsaturate_s = 0;
+                        key.vsaturate_t = 0;
+                        key.vsaturate_r = 0;
+                }
+                break;
+        case SHADER_VERTEX:
+                key.color_two_side = false;
+                key.half_precision = false;
+                key.rasterflat = false;
+                if (key.has_per_samp) {
+                        key.fsaturate_s = 0;
+                        key.fsaturate_t = 0;
+                        key.fsaturate_r = 0;
+                }
+                break;
+        }
+        for (v = shader->variants; v; v = v->next)
+                if (ir3_shader_key_equal(&key, &v->key))
+                        return v;
+        /* compile new variant if it doesn't exist already: */
+        v = create_variant(shader, key);
+        v->next = shader->variants;
+        shader->variants = v;
+        return v;
+}
+void
+ir3_shader_destroy(struct ir3_shader *shader)
+{
+        struct ir3_shader_variant *v, *t;
+        for (v = shader->variants; v; ) {
+                t = v;
+                v = v->next;
+                delete_variant(t);
+        }
+        free((void *)shader->tokens);
+        free(shader);
+}
+struct ir3_shader *
+ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens,
+                enum shader_t type)
+{
+        struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
+        shader->pctx = pctx;
+        shader->type = type;
+        shader->tokens = tgsi_dup_tokens(tokens);
+        return shader;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/freedreno/ir3/ir3_shader.h
 ,0 → 1,277
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+#ifndef IR3_SHADER_H_
+#define IR3_SHADER_H_
+#include "ir3.h"
+#include "disasm.h"
+typedef uint16_t ir3_semantic;  /* semantic name + index */
+static inline ir3_semantic
+ir3_semantic_name(uint8_t name, uint16_t index)
+{
+        return (name << 8) | (index & 0xff);
+}
+static inline uint8_t sem2name(ir3_semantic sem)
+{
+        return sem >> 8;
+}
+static inline uint16_t sem2idx(ir3_semantic sem)
+{
+        return sem & 0xff;
+}
+/* Configuration key used to identify a shader variant.. different
+ * shader variants can be used to implement features not supported
+ * in hw (two sided color), binning-pass vertex shader, etc.
+ */
+struct ir3_shader_key {
+        union {
+                struct {
+                        /* do we need to check {v,f}saturate_{s,t,r}? */
+                        unsigned has_per_samp : 1;
+                        /*
+                         * Vertex shader variant parameters:
+                         */
+                        unsigned binning_pass : 1;
+                        /*
+                         * Fragment shader variant parameters:
+                         */
+                        unsigned color_two_side : 1;
+                        unsigned half_precision : 1;
+                        /* used when shader needs to handle flat varyings (a4xx),
+                         * for TGSI_INTERPOLATE_COLOR:
+                         */
+                        unsigned rasterflat : 1;
+                };
+                uint32_t global;
+        };
+        /* bitmask of sampler which needs coords clamped for vertex
+         * shader:
+         */
+        uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
+        /* bitmask of sampler which needs coords clamped for frag
+         * shader:
+         */
+        uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
+        /* bitmask of sampler which produces integer outputs:
+         */
+        uint16_t vinteger_s, finteger_s;
+};
+static inline bool
+ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
+{
+        /* slow-path if we need to check {v,f}saturate_{s,t,r} */
+        if (a->has_per_samp || b->has_per_samp)
+                return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0;
+        return a->global == b->global;
+}
+struct ir3_shader_variant {
+        struct fd_bo *bo;
+        struct ir3_shader_key key;
+        struct ir3_info info;
+        struct ir3 *ir;
+        /* the instructions length is in units of instruction groups
+         * (4 instructions for a3xx, 16 instructions for a4xx.. each
+         * instruction is 2 dwords):
+         */
+        unsigned instrlen;
+        /* the constants length is in units of vec4's, and is the sum of
+         * the uniforms and the built-in compiler constants
+         */
+        unsigned constlen;
+        /* About Linkage:
+         *   + Let the frag shader determine the position/compmask for the
+         *     varyings, since it is the place where we know if the varying
+         *     is actually used, and if so, which components are used.  So
+         *     what the hw calls "outloc" is taken from the "inloc" of the
+         *     frag shader.
+         *   + From the vert shader, we only need the output regid
+         */
+        /* for frag shader, pos_regid holds the frag_pos, ie. what is passed
+         * to bary.f instructions
+         */
+        uint8_t pos_regid;
+        bool frag_coord, frag_face, color0_mrt;
+        /* varyings/outputs: */
+        unsigned outputs_count;
+        struct {
+                ir3_semantic semantic;
+                uint8_t regid;
+        } outputs[16 + 2];  /* +POSITION +PSIZE */
+        bool writes_pos, writes_psize;
+        /* vertices/inputs: */
+        unsigned inputs_count;
+        struct {
+                ir3_semantic semantic;
+                uint8_t regid;
+                uint8_t compmask;
+                uint8_t ncomp;
+                /* In theory inloc of fs should match outloc of vs.  Or
+                 * rather the outloc of the vs is 8 plus the offset passed
+                 * to bary.f.  Presumably that +8 is to account for
+                 * gl_Position/gl_PointSize?
+                 *
+                 * NOTE inloc is currently aligned to 4 (we don't try
+                 * to pack varyings).  Changing this would likely break
+                 * assumptions in few places (like setting up of flat
+                 * shading in fd3_program) so be sure to check all the
+                 * spots where inloc is used.
+                 */
+                uint8_t inloc;
+                uint8_t bary;
+                uint8_t interpolate;
+        } inputs[16 + 2];  /* +POSITION +FACE */
+        unsigned total_in;       /* sum of inputs (scalar) */
+        /* do we have one or more texture sample instructions: */
+        bool has_samp;
+        /* do we have kill instructions: */
+        bool has_kill;
+        /* const reg # of first immediate, ie. 1 == c1
+         * (not regid, because TGSI thinks in terms of vec4 registers,
+         * not scalar registers)
+         */
+        unsigned first_driver_param;
+        unsigned first_immediate;
+        unsigned immediates_count;
+        struct {
+                uint32_t val[4];
+        } immediates[64];
+        /* shader variants form a linked list: */
+        struct ir3_shader_variant *next;
+        /* replicated here to avoid passing extra ptrs everywhere: */
+        enum shader_t type;
+        struct ir3_shader *shader;
+};
+struct ir3_shader {
+        enum shader_t type;
+        struct pipe_context *pctx;
+        const struct tgsi_token *tokens;
+        struct ir3_shader_variant *variants;
+        /* so far, only used for blit_prog shader.. values for
+         * VPC_VARYING_PS_REPL[i].MODE
+         */
+        uint32_t vpsrepl[8];
+};
+void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
+struct ir3_shader * ir3_shader_create(struct pipe_context *pctx,
+                const struct tgsi_token *tokens, enum shader_t type);
+void ir3_shader_destroy(struct ir3_shader *shader);
+uint32_t ir3_shader_gpuid(struct ir3_shader *shader);
+struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
+                struct ir3_shader_key key);
+/*
+ * Helper/util:
+ */
+static inline int
+ir3_find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
+{
+        int j;
+        for (j = 0; j < so->outputs_count; j++)
+                if (so->outputs[j].semantic == semantic)
+                        return j;
+        /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
+         * in the vertex shader.. but the fragment shader doesn't know this
+         * so  it will always have both IN.COLOR[n] and IN.BCOLOR[n].  So
+         * at link time if there is no matching OUT.BCOLOR[n], we must map
+         * OUT.COLOR[n] to IN.BCOLOR[n].  And visa versa if there is only
+         * a OUT.BCOLOR[n] but no matching OUT.COLOR[n]
+         */
+        if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) {
+                unsigned idx = sem2idx(semantic);
+                semantic = ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx);
+        } else if (sem2name(semantic) == TGSI_SEMANTIC_COLOR) {
+                unsigned idx = sem2idx(semantic);
+                semantic = ir3_semantic_name(TGSI_SEMANTIC_BCOLOR, idx);
+        } else {
+                return 0;
+        }
+        for (j = 0; j < so->outputs_count; j++)
+                if (so->outputs[j].semantic == semantic)
+                        return j;
+        debug_assert(0);
+        return 0;
+}
+static inline int
+ir3_next_varying(const struct ir3_shader_variant *so, int i)
+{
+        while (++i < so->inputs_count)
+                if (so->inputs[i].compmask && so->inputs[i].bary)
+                        break;
+        return i;
+}
+static inline uint32_t
+ir3_find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic)
+{
+        int j;
+        for (j = 0; j < so->outputs_count; j++)
+                if (so->outputs[j].semantic == semantic)
+                        return so->outputs[j].regid;
+        return regid(63, 0);
+}
+#endif /* IR3_SHADER_H_ */

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5563 → Rev 5564